To run: Beforehand:

module load pandoc

In R:

setwd("~/shared-gandalm/brain_CTP/Scripts/integration/ctp_differences")
rmarkdown::render("ctp_differences.Rmd", "html_document")

1 Set-up

1.1 Directories and libraries

library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.5     ✔ purrr   0.3.4
## ✔ tibble  3.1.6     ✔ dplyr   1.0.7
## ✔ tidyr   1.1.4     ✔ stringr 1.4.0
## ✔ readr   2.1.0     ✔ forcats 0.5.1
## ── Conflicts ─────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(rmarkdown)    # You need this library to run this template.
library(epuRate)      # Install with remotes::install_github("holtzy/epuRate", force=TRUE) - use this instead of devtools::install_github()
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(compositions)
## Welcome to compositions, a package for compositional data analysis.
## Find an intro with "? compositions"
## 
## Attaching package: 'compositions'
## The following objects are masked from 'package:stats':
## 
##     anova, cor, cov, dist, var
## The following objects are masked from 'package:base':
## 
##     %*%, norm, scale, scale.default
library(wesanderson)

library(rms)
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: SparseM
## 
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
## 
##     backsolve
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'lmtest'
## The following object is masked from 'package:rms':
## 
##     lrtest
library(ashr)
library(mashr)
library(broom)
study <- "Jaffe2018"
data_dir <- paste("~/shared-gandalm/brain_CTP/Data/methylation/", study, "/analysis", sep = "")
filen <- "Jaffe2018_age0"
scz_dir <- paste(data_dir, "/", filen, "_ctp_pheno.txt", sep = "")
scz_adjoligo_dir <- paste(data_dir, "/", "hseq_adjoligo_ctp_pheno.raw.txt", sep = "")

study <- "ASD_methylation_brain"
data_dir <- paste("~/shared-gandalm/brain_CTP/Data/methylation/", study, "/analysis", sep = "")
filen <- "KCL_R01MH094714_ASD_Illumina450K_PFC"
asd_dir <- paste(data_dir, "/", filen, "_ctp_pheno.txt", sep = "")
asd_adjoligo_dir <- paste(data_dir, "/", "hseq_adjoligo_ctp_pheno.raw.txt", sep = "")

study <- "ROSMAP"
data_dir <- paste("~/shared-gandalm/brain_CTP/Data/methylation/", study, "/analysis", sep = "")
#filen <- "ROSMAP_ComBatplate_neg0"
filen <- "ROSMAP"
azd_dir <- paste(data_dir, "/", filen, "_ctp_pheno.txt", sep = "")
azd_adjoligo_dir <- paste(data_dir, "/", "hseq_adjoligo_ctp_pheno.raw.txt", sep = "")

out_dir <- "~/shared-gandalm/brain_CTP/Data/integration/ctp_differences"

1.2 Read-in data

1.2.1 All CTPs

# All CTPs
scz <- read.delim(scz_dir, header = T, as.is = T)
asd <- read.delim(asd_dir, header = T, as.is = T)
azd <- read.delim(azd_dir, header = T, as.is = T)

# Adjust for oligodendrocytes
scz_adjoligo <- read.delim(scz_adjoligo_dir, header = T, as.is = T)
asd_adjoligo <- read.delim(asd_adjoligo_dir, header = T, as.is = T)
azd_adjoligo <- read.delim(azd_adjoligo_dir, header = T, as.is = T)

1.2.1.1 ASD study: subselect for better group comparison

Subselect for better group comparison

  • based on the stats shown, try removing the youngest 12 participants (all ASD) + removing the oldest 6 participants (all CTL), for the purposes of within-study comparisons
  • justification:
    • for LIBD, only took participants older than 18 (since differences in CTPs may be expected in this age group)
    • there are more ASD than CTL participants, so this should help balance the design a bit better
foo <- summary(asd %>% filter(RegionID == "BA9") %>% pull(age))

asd %>% filter(RegionID == "BA9") %>% group_by(group) %>% dplyr::summarise(Q1 = length(which(age >= foo[1] & age <= foo[2])), Q2 = length(which(age > foo[2] & age <= foo[3])), Q3 = length(which(age > foo[3] & age <= foo[5])), Q4 = length(which(age > foo[5] & age <= foo[6])))
## # A tibble: 2 × 5
##   group    Q1    Q2    Q3    Q4
##   <chr> <int> <int> <int> <int>
## 1 ASD      16    13    10     4
## 2 CTL       3     7     8    15
asd %>% arrange(age) %>% dplyr::select(c("IID", "age", "group")) %>% head(20)
##            IID age group
## 1  AN03345_BA9   2   ASD
## 2  UMB5308_BA9   4   ASD
## 3  AN08873_BA9   5   ASD
## 4  AN13872_BA9   5   ASD
## 5  UMB4849_BA9   7   ASD
## 6  AN19511_BA9   8   ASD
## 7  AN16641_BA9   9   ASD
## 8  AN14762_BA9   9   ASD
## 9  AN06365_BA9  10   ASD
## 10 AN09402_BA9  11   ASD
## 11 AN16115_BA9  11   ASD
## 12 AN04682_BA9  15   ASD
## 13 UMB5242_BA9  15   CTL
## 14 AN02987_BA9  15   ASD
## 15 UMB5302_BA9  16   ASD
## 16 AN07444_BA9  17   CTL
## 17 AN01570_BA9  18   ASD
## 18 AN03935_BA9  19   ASD
## 19 AN03217_BA9  19   CTL
## 20 UMB4590_BA9  20   CTL
asd %>% arrange(desc(age)) %>% dplyr::select(c("IID", "age", "group")) %>% head(20)
##             IID age group
## 1  NP115_09_BA9  71   CTL
## 2  NP053_06_BA9  71   CTL
## 3  NP168_08_BA9  71   CTL
## 4  x1148_98_BA9  70   CTL
## 5  x1340_94_BA9  69   CTL
## 6  NP071_10_BA9  68   CTL
## 7   UMB5303_BA9  67   ASD
## 8  x1085_93_BA9  67   CTL
## 9  NP122_11_BA9  65   CTL
## 10 x1127_93_BA9  64   CTL
## 11  NP92_11_BA9  63   CTL
## 12  NP94_11_BA9  62   CTL
## 13  AN10723_BA9  60   CTL
## 14  AN09714_BA9  60   ASD
## 15 x1054_97_BA9  59   CTL
## 16  AN11864_BA9  57   CTL
## 17  NP81_11_BA9  56   CTL
## 18  AN01093_BA9  56   ASD
## 19  AN17515_BA9  54   ASD
## 20  UMB1578_BA9  53   CTL
# Choose to remove the youngest 12 people (all in ASD group)
asd_exclude <- rbind(asd %>% arrange(age) %>% head(12) %>% dplyr::select("IID"), 
                     asd %>% arrange(desc(age)) %>% head(6) %>% dplyr::select("IID"))

write.table(asd_exclude[,c(1,1)], "~/shared-gandalm/brain_CTP/Data/integration/ctp_differences/asd_exclude_studybalance.id", col.names = F, row.names = F, sep = "\t", quote = F)

1.2.1.2 SCZ study: subselect for better group comparison

Subselect for better group comparison

  • take people aged 18+
foo <- summary(scz %>% pull(age))

scz %>% group_by(group) %>% dplyr::summarise(Q1 = length(which(age >= foo[1] & age <= foo[2])), Q2 = length(which(age > foo[2] & age <= foo[3])), Q3 = length(which(age > foo[3] & age <= foo[5])), Q4 = length(which(age > foo[5] & age <= foo[6])))
## # A tibble: 2 × 5
##   group      Q1    Q2    Q3    Q4
##   <chr>   <int> <int> <int> <int>
## 1 Control   107    68    60    54
## 2 SCZ        12    51    58    65
scz %>% arrange(age) %>% dplyr::select(c("IID", "age", "group")) %>% filter(age <= 18)
##                  IID       age   group
## 1   Sample66_Control  0.002739 Control
## 2   Sample67_Control  0.005479 Control
## 3   Sample68_Control  0.068493 Control
## 4   Sample69_Control  0.095890 Control
## 5   Sample70_Control  0.109589 Control
## 6   Sample72_Control  0.202739 Control
## 7   Sample73_Control  0.216438 Control
## 8   Sample75_Control  0.246575 Control
## 9   Sample76_Control  0.249315 Control
## 10  Sample77_Control  0.290410 Control
## 11  Sample79_Control  0.304109 Control
## 12  Sample81_Control  0.326027 Control
## 13  Sample85_Control  0.326027 Control
## 14  Sample88_Control  0.353424 Control
## 15  Sample89_Control  0.364383 Control
## 16  Sample92_Control  0.391780 Control
## 17  Sample94_Control  0.402739 Control
## 18  Sample96_Control  0.408219 Control
## 19  Sample97_Control  0.482191 Control
## 20  Sample98_Control  0.515068 Control
## 21 Sample102_Control  1.339726 Control
## 22 Sample103_Control  1.624657 Control
## 23 Sample104_Control  2.024657 Control
## 24 Sample106_Control  2.490410 Control
## 25 Sample107_Control  2.715068 Control
## 26 Sample109_Control  3.046575 Control
## 27 Sample110_Control  3.980821 Control
## 28 Sample112_Control  4.139726 Control
## 29 Sample114_Control  4.208219 Control
## 30 Sample115_Control  4.652054 Control
## 31 Sample116_Control  4.709589 Control
## 32 Sample117_Control  5.345205 Control
## 33 Sample119_Control  6.339726 Control
## 34 Sample121_Control  8.254794 Control
## 35 Sample122_Control  9.156164 Control
## 36 Sample124_Control 10.726027 Control
## 37 Sample125_Control 12.282191 Control
## 38 Sample127_Control 12.879452 Control
## 39 Sample128_Control 13.032876 Control
## 40 Sample129_Control 13.106849 Control
## 41 Sample130_Control 13.169863 Control
## 42 Sample131_Control 13.336986 Control
## 43 Sample132_Control 13.421917 Control
## 44 Sample133_Control 13.487671 Control
## 45 Sample134_Control 13.698630 Control
## 46 Sample135_Control 13.997260 Control
## 47 Sample136_Control 14.101369 Control
## 48 Sample137_Control 14.553424 Control
## 49 Sample138_Control 14.632876 Control
## 50 Sample139_Control 14.873972 Control
## 51 Sample140_Control 15.128767 Control
## 52 Sample141_Control 15.158904 Control
## 53 Sample142_Control 15.172602 Control
## 54 Sample143_Control 15.232876 Control
## 55 Sample144_Control 15.331506 Control
## 56 Sample145_Control 15.460273 Control
## 57 Sample146_Control 15.493150 Control
## 58 Sample148_Control 15.800000 Control
## 59 Sample151_Control 16.183561 Control
## 60 Sample153_Control 16.353424 Control
## 61 Sample154_Control 16.583561 Control
## 62 Sample156_Control 16.649315 Control
## 63 Sample157_Control 16.695890 Control
## 64 Sample158_Control 16.726027 Control
## 65 Sample159_Control 16.753424 Control
## 66 Sample162_Control 16.800000 Control
## 67 Sample163_Control 16.934246 Control
## 68 Sample165_Control 17.090410 Control
## 69 Sample167_Control 17.219178 Control
## 70 Sample168_Control 17.235616 Control
## 71  Sample169_Schizo 17.238356     SCZ
## 72 Sample171_Control 17.279452 Control
## 73 Sample172_Control 17.764383 Control
# Choose to remove the youngest 12 people (all in ASD group)
scz_exclude <- scz %>% filter(age <= 18) %>% dplyr::select(c("IID"))
scz_exclude$IID <- unlist(lapply(strsplit(scz_exclude$IID, "_"), function(x) x[1]))

write.table(scz_exclude[,c(1,1)], "~/shared-gandalm/brain_CTP/Data/integration/ctp_differences/scz_exclude_studybalance.id", col.names = F, row.names = F, sep = "\t", quote = F)

1.3 Prep to merge

1.3.1 Same names for columns

# Diagnosis variables
scz$dx <- ifelse(scz$group == "SCZ", "SCZ", "CTL")
asd$dx <- ifelse(asd$Diagnosis == "CTL", "CTL", "ASD")
azd$dx <- ifelse(azd$group == "CTL", "CTL", "AZD")

scz_adjoligo$dx <- ifelse(scz_adjoligo$group == "SCZ", "SCZ", "CTL")
asd_adjoligo$dx <- ifelse(asd_adjoligo$group == "CTL", "CTL", "ASD")
azd_adjoligo$dx <- ifelse(azd_adjoligo$group == "CTL", "CTL", "AZD")

# Batch variable (take the batch variable with biggest effect)
scz$batch <- scz$plate
asd$batch <- paste("ASDbrain", asd$Batch, sep = "")
#azd$batch <- azd$Sample_Plate
azd$batch <- paste("ROSMAP", azd$batch, sep = "")

scz_adjoligo$batch <- scz_adjoligo$plate
asd_adjoligo$batch <- asd_adjoligo$Batch
azd_adjoligo$batch <- azd_adjoligo$Sample_Plate

# Study variable
scz$study <- "LIBD"
asd$study <- "ASDbrain"
azd$study <- "ROSMAP"

scz_adjoligo$study <- "LIBD"
asd_adjoligo$study <- "ASDbrain"
azd_adjoligo$study <- "ROSMAP"

# Amend IID for SCZ
scz$IID <- unlist(lapply(strsplit(scz$IID, "_"), function(x) x[1]))
scz_adjoligo$IID <- unlist(lapply(strsplit(scz_adjoligo$IID, "_"), function(x) x[1]))

# Rename cell-types from the deconvolution of interest
colnames(scz) <- gsub("hseq_", "", colnames(scz))
colnames(asd) <- gsub("hseq_", "", colnames(asd))
colnames(azd) <- gsub("hseq_", "", colnames(azd))
colnames(scz_adjoligo) <- gsub("hseq_", "", colnames(scz_adjoligo))
colnames(asd_adjoligo) <- gsub("hseq_", "", colnames(asd_adjoligo))
colnames(azd_adjoligo) <- gsub("hseq_", "", colnames(azd_adjoligo))

# Add age2
asd$age2 <- asd$age^2
scz$age2 <- scz$age^2
azd$age2 <- azd$age^2
asd_adjoligo$age2 <- asd_adjoligo$age^2
scz_adjoligo$age2 <- scz_adjoligo$age^2
azd_adjoligo$age2 <- azd_adjoligo$age^2

1.3.2 Take hseq data

ctp_cols <- c("Exc", "Inh", "Astro", "Endo", "Micro", "Oligo", "OPC", "Neuron", "Glial")
level2_cols <- ctp_cols[-which(ctp_cols %in% c("Neuron", "Glial"))]

glial_rmoligo_n <- level2_cols[-which(level2_cols %in% c("Exc", "Inh", "Oligo"))]
neuron_n <- c("Exc", "Inh")
level2_rmoligo_cols <- c(neuron_n, glial_rmoligo_n)
ctp_rmoligo_cols <- c(level2_rmoligo_cols, "Neuron", "Glial")

keepcol_scz <- c("IID", ctp_cols, "dx", "age", "age2", "sex", "batch", "study")
keepcol_scz_adjoligo <- c("IID", ctp_rmoligo_cols, "dx", "age", "age2", "sex", "batch", "study")
#keepcol_scz <- c("IID", "Exc", "Inh", "NonN_Astro_FGF3R", "NonN_Micro.Endo_TYROBP", "NonN_Oligo_MBP", "group", "age", "sex", "race", "plate")

keepcol_asd <- c("IID", ctp_cols, "dx", "age", "age2", "sex", "batch", "study")
keepcol_asd_adjoligo <- c("IID", ctp_rmoligo_cols, "dx", "age", "age2", "sex", "batch", "study")
#keepcol_asd <- c("IID", "Exc", "Inh", "NonN_Astro_FGF3R", "NonN_Micro.Endo_TYROBP", "NonN_Oligo_MBP", "Diagnosis", "age", "sex", "Batch", "PMI", "BrainBank")

keepcol_azd <- c("IID", ctp_cols, "dx", "age", "age2", "sex", "batch", "study")
keepcol_azd_adjoligo <- c("IID",  ctp_rmoligo_cols, "dx", "age", "age2", "sex", "batch", "study")
#keepcol_azd <- c("IID", "Exc", "Inh", "NonN_Astro_FGF3R", "NonN_Micro.Endo_TYROBP", "NonN_Oligo_MBP", "group", "braaksc", "ceradsc", "cogdx", "dcfdx_lv", "age", "sex", "batch", "Sample_Plate", "study")

1.3.3 Join into 1 dataframe

ctp <- rbind(scz[,keepcol_scz], asd[,keepcol_asd])
ctp <- rbind(ctp, azd[,keepcol_azd])

# Adjust for oligodendrocytes
ctp_adjoligo <- rbind(scz_adjoligo[,keepcol_scz_adjoligo], asd_adjoligo[,keepcol_asd_adjoligo])
ctp_adjoligo <- rbind(ctp_adjoligo, azd_adjoligo[,keepcol_azd_adjoligo])

1.3.4 Stacked bar charts

ctp.tmp <- ctp %>% group_by(dx) %>% dplyr::arrange(Glial, .by_group = TRUE) %>% mutate(plot_id = paste(dx, Glial, sep = ""))
ctp.tmp$plot_id <- paste(ctp.tmp$plot_id, 1:nrow(ctp.tmp), sep = "")
ctp.tmp$plot_id <- ifelse(ctp.tmp$dx != "CTL", paste("a", ctp.tmp$plot_id, sep = ""), ctp.tmp$plot_id)

ctp.long <- melt(ctp.tmp, id.vars = c("IID", "plot_id", "dx", "age", "age2", "sex", "batch", "study", "Neuron", "Glial"), variable.name = "celltype", value.name = "CTP")
ctp.long$celltype <- as.character(ctp.long$celltype)
ctp.long$celltype[grep("NonN", ctp.long$celltype)] <- unlist(lapply(strsplit(ctp.long$celltype[grep("NonN", ctp.long$celltype)], "_"), function(x) x[2]))
# - order
ctp.long$MatchDx <- match(ctp.long$dx, c("CTL", "ASD", "SCZ", "AZD"))
ctp.long$MatchStudy <- match(ctp.long$study, c("ASDbrain", "LIBD", "ROSMAP"))
ctp.long$MatchCell <- match(ctp.long$celltype, unique(ctp.long$celltype))
ctp.long$MatchID <- match(ctp.long$plot_id, ctp.long$plot_id)
ctp.long$group <- ifelse(ctp.long$dx == "CTL", "CTL", "Dx")

# Plot
stackedbar <- ctp.long %>% mutate(dx = fct_reorder(dx, MatchDx)) %>% mutate(study = fct_reorder(study, MatchStudy)) %>% mutate(celltype = fct_reorder(celltype, MatchCell)) %>%
ggplot(aes(x = plot_id, y = CTP, fill = celltype)) + 
  geom_bar(position="stack", stat="identity") +
  scale_fill_manual(values = wes_palette(name = "Zissou1", 7, type = "continuous"), name = "") +
  theme(axis.title.x = element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
  facet_grid(~ study + dx, scales = "free_x", space = 'free_x')

stackedbar

ggsave(paste(out_dir, "/ctp_raw_stackedbar_ROSMAP_ASDbrain_LIBD.png", sep = ""), stackedbar, height = 8, width = 12)
ggsave(paste(out_dir, "/ctp_raw_stackedbar_ROSMAP_ASDbrain_LIBD.svg", sep = ""), stackedbar, height = 8, width = 12)

1.4 clr transformation

# Keep oligodendrocytes
keep_cols <- c("IID", "dx", "age", "age2", "sex", "batch", "study")
ctp.tmp.clr <- as.matrix(ctp %>% dplyr::select(c(all_of(level2_cols))))

# - Make minimum CTP = 1e-3
length(which(ctp.tmp.clr <= 1e-3)) # check there are no 0s
## [1] 4
ctp.tmp.clr0 <- ctp.tmp.clr
ctp.tmp.clr0[which(ctp.tmp.clr0 <= 1e-3)] <- 1e-3

# - Perform clr-transform
ctp.tmp.clr0.tr <- clr(ctp.tmp.clr0)
ctp.clr.1e3 <- data.frame(ctp[,keep_cols], ctp.tmp.clr0.tr)

# Adjust for oligodendrocytes
keep_cols <- c("IID", "dx", "age", "age2", "sex", "batch", "study")
ctp_adjoligo.tmp.clr <- as.matrix(ctp_adjoligo %>% dplyr::select(c(all_of(neuron_n), all_of(glial_rmoligo_n))))

# Make minimum CTP = 1e-3
length(which(ctp_adjoligo.tmp.clr <= 1e-3)) # check there are no 0s
## [1] 4
ctp_adjoligo.tmp.clr0 <- ctp_adjoligo.tmp.clr
ctp_adjoligo.tmp.clr0[which(ctp_adjoligo.tmp.clr0 <= 1e-3)] <- 1e-3

# Perform clr-transform
ctp_adjoligo.tmp.clr0.tr <- clr(ctp_adjoligo.tmp.clr0)
ctp_adjoligo.clr.1e3 <- data.frame(ctp[,keep_cols], ctp_adjoligo.tmp.clr0.tr)

1.4.1 Add MatchOrder column

ctp$MatchOrder <- match(ctp$dx, c("CTL", "ASD", "SCZ", "AZD"))
ctp_adjoligo$MatchOrder <- match(ctp_adjoligo$dx, c("CTL", "ASD", "SCZ", "AZD"))
ctp.clr.1e3$MatchOrder <- match(ctp.clr.1e3$dx, c("CTL", "ASD", "SCZ", "AZD"))
ctp_adjoligo.clr.1e3$MatchOrder <- match(ctp_adjoligo.clr.1e3$dx, c("CTL", "ASD", "SCZ", "AZD"))

1.5 Write output

write.table(ctp, paste(out_dir, "/ctp_noclr_aggregated_scz_asd_azd.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")

write.table(ctp_adjoligo, paste(out_dir, "/ctp_adjoligo_noclr_aggregated_scz_asd_azd.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")

write.table(ctp.clr.1e3, paste(out_dir, "/ctp_clr1e-3_aggregated_scz_asd_azd.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")

write.table(ctp_adjoligo.clr.1e3, paste(out_dir, "/ctp_adjoligo_clr1e-3_aggregated_scz_asd_azd.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")

1.6 Long format

ctp.long <- melt(ctp, id.vars = c("IID", "dx", "age", "age2", "sex", "batch", "study", "MatchOrder"), variable.name = "celltype", value.name = "CTP")
ctp.long$Level <- ifelse(ctp.long$celltype %in% c("Neuron", "Glial"), "Level1", "Level2")
ctp.long$MatchOrder <- match(ctp.long$dx, c("CTL", "ASD", "SCZ", "AZD"))
ctp.long$MatchCTP <- match(ctp.long$celltype, unique(c(ctp.long %>% filter(Level == "Level1") %>% pull(celltype) %>% as.character(), ctp.long %>% filter(Level == "Level2") %>% pull(celltype) %>% as.character())))
ctp.long$celltype <- as.character(ctp.long$celltype)
ctp.long$celltype[grep("NonN", ctp.long$celltype)] <- unlist(lapply(strsplit(ctp.long$celltype[grep("NonN", ctp.long$celltype)], "_"), function(x) x[2]))

ctp_adjoligo.long <- melt(ctp_adjoligo, id.vars = c("IID", "dx", "age", "age2", "sex", "batch", "study", "MatchOrder"), variable.name = "celltype", value.name = "CTP")
ctp_adjoligo.long$Level <- ifelse(ctp_adjoligo.long$celltype %in% c("Neuron", "Glial"), "Level1", "Level2")
ctp_adjoligo.long$MatchOrder <- match(ctp_adjoligo.long$dx, c("CTL", "ASD", "SCZ", "AZD"))
ctp_adjoligo.long$MatchCTP <- match(ctp_adjoligo.long$celltype, unique(c(ctp_adjoligo.long %>% filter(Level == "Level1") %>% pull(celltype) %>% as.character(), ctp_adjoligo.long %>% filter(Level == "Level2") %>% pull(celltype) %>% as.character())))
ctp_adjoligo.long$celltype <- as.character(ctp_adjoligo.long$celltype)
ctp_adjoligo.long$celltype[grep("NonN", ctp_adjoligo.long$celltype)] <- unlist(lapply(strsplit(ctp_adjoligo.long$celltype[grep("NonN", ctp_adjoligo.long$celltype)], "_"), function(x) x[2]))

ctp.clr.1e3.long <- melt(ctp.clr.1e3, id.vars = c("IID", "dx", "age", "age2", "sex", "batch", "study", "MatchOrder"), variable.name = "celltype", value.name = "CTP")
ctp.clr.1e3.long$Level <- ifelse(ctp.clr.1e3.long$celltype %in% c("Neuron", "Glial"), "Level1", "Level2")
ctp.clr.1e3.long$MatchOrder <- match(ctp.clr.1e3.long$dx, c("CTL", "ASD", "SCZ", "AZD"))
ctp.clr.1e3.long$MatchCTP <- match(ctp.clr.1e3.long$celltype, unique(c(ctp.clr.1e3.long %>% filter(Level == "Level1") %>% pull(celltype) %>% as.character(), ctp.clr.1e3.long %>% filter(Level == "Level2") %>% pull(celltype) %>% as.character())))
ctp.clr.1e3.long$celltype <- as.character(ctp.clr.1e3.long$celltype)
ctp.clr.1e3.long$celltype[grep("NonN", ctp.clr.1e3.long$celltype)] <- unlist(lapply(strsplit(ctp.clr.1e3.long$celltype[grep("NonN", ctp.clr.1e3.long$celltype)], "_"), function(x) x[2]))

ctp_adjoligo.clr.1e3.long <- melt(ctp_adjoligo.clr.1e3, id.vars = c("IID", "dx", "age", "age2", "sex", "batch", "study", "MatchOrder"), variable.name = "celltype", value.name = "CTP")
ctp_adjoligo.clr.1e3.long$Level <- ifelse(ctp_adjoligo.clr.1e3.long$celltype %in% c("Neuron", "Glial"), "Level1", "Level2")
ctp_adjoligo.clr.1e3.long$MatchOrder <- match(ctp_adjoligo.clr.1e3.long$dx, c("CTL", "ASD", "SCZ", "AZD"))
ctp_adjoligo.clr.1e3.long$MatchCTP <- match(ctp_adjoligo.clr.1e3.long$celltype, unique(c(ctp_adjoligo.clr.1e3.long %>% filter(Level == "Level1") %>% pull(celltype) %>% as.character(), ctp_adjoligo.clr.1e3.long %>% filter(Level == "Level2") %>% pull(celltype) %>% as.character())))
ctp_adjoligo.clr.1e3.long$celltype <- as.character(ctp_adjoligo.clr.1e3.long$celltype)
ctp_adjoligo.clr.1e3.long$celltype[grep("NonN", ctp_adjoligo.clr.1e3.long$celltype)] <- unlist(lapply(strsplit(ctp_adjoligo.clr.1e3.long$celltype[grep("NonN", ctp_adjoligo.clr.1e3.long$celltype)], "_"), function(x) x[2]))

2 Plots of differences

2.1 No clr, no adjustment for oligodendrocytes

ctp.long %>% filter(!IID %in% asd_exclude$IID) %>% filter(!IID %in% scz_exclude$IID) %>%
  mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = fct_rev(celltype), y = CTP, fill = dx)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(aes(colour = dx), position = position_jitterdodge(.1),alpha=.3) +
  coord_flip() + theme_bw() + xlab("") +
  scale_fill_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  facet_grid(Level ~ study, scales = "free_y", space = 'free_y')

ggsave(paste(out_dir, "/ctp_differences_bygroup_raw.png", sep = ""))
## Saving 7 x 5 in image
ggsave(paste(out_dir, "/ctp_differences_bygroup_raw.svg", sep = ""))
## Saving 7 x 5 in image

2.2 No clr, adjustment for oligodendrocytes

ctp_adjoligo.long %>% filter(!IID %in% asd_exclude$IID) %>% filter(!IID %in% scz_exclude$IID) %>%
  mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = fct_rev(celltype), y = CTP, fill = dx)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(aes(colour = dx), position = position_jitterdodge(.1),alpha=.3) +
  coord_flip() + theme_bw() + xlab("") +
  scale_fill_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  facet_grid(Level ~ study, scales = "free_y", space = 'free_y')

2.3 clr, no adjustment for oligodendrocytes

ctp.clr.1e3.long %>% filter(!IID %in% asd_exclude$IID) %>% filter(!IID %in% scz_exclude$IID) %>%
  mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = fct_rev(celltype), y = CTP, fill = dx)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(aes(colour = dx), position = position_jitterdodge(.1),alpha=.3) +
  coord_flip() + theme_bw() + xlab("") +
  scale_fill_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  facet_grid(Level ~ study, scales = "free_y", space = 'free_y')

ggsave(paste(out_dir, "/ctp_differences_bygroup_clr.png", sep = ""))
## Saving 7 x 5 in image
ggsave(paste(out_dir, "/ctp_differences_bygroup_clr.svg", sep = ""))
## Saving 7 x 5 in image

2.4 clr, adjustment for oligodendrocytes

ctp_adjoligo.clr.1e3.long %>% filter(!IID %in% asd_exclude$IID) %>% filter(!IID %in% scz_exclude$IID) %>%
  mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = fct_rev(celltype), y = CTP, fill = dx)) + 
  geom_boxplot(position = position_dodge(.8),alpha=.8,outlier.size = 0)+ 
  geom_point(aes(colour = dx), position = position_jitterdodge(.1),alpha=.3) +
  coord_flip() + theme_bw() + xlab("") +
  scale_fill_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  facet_grid(Level ~ study, scales = "free_y", space = 'free_y')

3 Age differences

  • include all participants (ie. irrespective of within-study age matching)

3.1 Plot distributions

ctp.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  ggplot(aes(x = age, fill = dx)) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_wrap(~ study, scales = "free_y")
## Warning: Removed 9 rows containing non-finite values (stat_density).

ggsave(paste(out_dir, "/demographic_differences_age.pdf", sep = ""), height = 3, width = 9)
## Warning: Removed 9 rows containing non-finite values (stat_density).
ggsave(paste(out_dir, "/demographic_differences_age.svg", sep = ""), height = 3, width = 9)
## Warning: Removed 9 rows containing non-finite values (stat_density).
ctp.long %>% filter(!IID %in% asd_exclude$IID) %>% filter(!IID %in% scz_exclude$IID) %>%
  mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  ggplot(aes(x = age, fill = dx)) +
  geom_density(alpha = 0.5) +
  scale_fill_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_wrap(~ study, scales = "free_y")
## Warning: Removed 9 rows containing non-finite values (stat_density).

ggsave(paste(out_dir, "/demographic_differences_age_asdsczexclude.pdf", sep = ""), height = 3, width = 9)
## Warning: Removed 9 rows containing non-finite values (stat_density).
ggsave(paste(out_dir, "/demographic_differences_age_asdsczexclude.svg", sep = ""), height = 3, width = 9)
## Warning: Removed 9 rows containing non-finite values (stat_density).

3.2 CTPs by age

3.2.1 No clr, no adjustment for oligodendrocytes

ctp.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = age, y = CTP, colour = dx)) +
  geom_point(alpha = 0.5) +
  geom_smooth() +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_grid(celltype ~ study, scales = "free_y")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 9 rows containing non-finite values (stat_smooth).
## Warning: Removed 9 rows containing missing values (geom_point).

Keep CTP % constant

ctp.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = age, y = CTP, colour = dx)) +
  geom_point(alpha = 0.5) +
  geom_smooth() +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_grid(celltype ~ study, scales = "free_x")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 9 rows containing non-finite values (stat_smooth).
## Warning: Removed 9 rows containing missing values (geom_point).

Aggregate into 1 plot

ctp.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = age, y = CTP)) +
  geom_point(alpha = 0.6, aes(colour = dx, shape = study)) +
  geom_smooth() +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_wrap(~ celltype, scales = "free_x", ncol = 1)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 9 rows containing non-finite values (stat_smooth).
## Warning: Removed 9 rows containing missing values (geom_point).

ggsave(paste(out_dir, "/ctp_differences_byage_raw.png", sep = ""), height = 10, width = 3)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 9 rows containing non-finite values (stat_smooth).

## Warning: Removed 9 rows containing missing values (geom_point).
ggsave(paste(out_dir, "/ctp_differences_byage_raw.svg", sep = ""), height = 10, width = 3)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 9 rows containing non-finite values (stat_smooth).

## Warning: Removed 9 rows containing missing values (geom_point).
ctp.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = age, y = CTP)) +
  geom_point(alpha = 0.6, aes(colour = dx, shape = study)) +
  geom_smooth() +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_wrap(~ celltype, scales = "free_x", ncol = 3)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 9 rows containing non-finite values (stat_smooth).

## Warning: Removed 9 rows containing missing values (geom_point).

ggsave(paste(out_dir, "/ctp_differences_byage_raw_col3.png", sep = ""), height = 7, width = 7)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 9 rows containing non-finite values (stat_smooth).

## Warning: Removed 9 rows containing missing values (geom_point).
ggsave(paste(out_dir, "/ctp_differences_byage_raw_col3.svg", sep = ""), height = 7, width = 7)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 9 rows containing non-finite values (stat_smooth).

## Warning: Removed 9 rows containing missing values (geom_point).

3.2.2 No clr, adjustment for oligodendrocytes

ctp_adjoligo.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = age, y = CTP, colour = dx)) +
  geom_point(alpha = 0.5) +
  geom_smooth() +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_grid(celltype ~ study, scales = "free")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 8 rows containing non-finite values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).

3.3 clr, no adjustment for oligodendrocytes

ctp.clr.1e3.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = age, y = CTP, colour = dx)) +
  geom_point(alpha = 0.5) +
  geom_smooth() +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_grid(celltype ~ study, scales = "free")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).

Plots that regress out batch effect

#batch_coef <- ctp.clr.1e3.long %>% group_by(study, celltype) %>% 
batch_coef <- ctp.clr.1e3.long %>% group_by(celltype) %>% 
  do(model = tidy(lm(CTP ~ batch, data = .))) %>% unnest(model) %>%
  mutate(term = gsub("batch", "", term)) %>%
  dplyr::rename(batch = term) %>%
  dplyr::select(c("celltype", "batch", "estimate"))

# Modify CTP in light of batch adjustment
# - subtraction as it's relative to a given batch (eg. coefficient +1, means that batch has +1 unit higher on average --> need to push back down)
ctp.clr.1e3.long %>%
  #left_join(., batch_coef, by = c("study", "celltype", "batch")) %>% 
  left_join(., batch_coef, by = c("celltype", "batch")) %>% 
  mutate(CTP_batchadjust = ifelse(is.na(estimate), CTP, CTP - estimate)) %>% 
  mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = age, y = CTP_batchadjust, colour = dx)) +
  geom_point(alpha = 0.5) +
  geom_smooth() +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_grid(celltype ~ study)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).

Keep CTP % constant

ctp.clr.1e3.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = age, y = CTP, colour = dx)) +
  geom_point(alpha = 0.5) +
  geom_smooth() +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_grid(celltype ~ study, scales = "free_x")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).

Aggregate into 1 plot

ctp.clr.1e3.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = age, y = CTP)) +
  geom_point(alpha = 0.6, aes(colour = dx, shape = study)) +
  geom_smooth() +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  theme(legend.position = "none") +
  facet_wrap(~ celltype, ncol = 1)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).

ggsave(paste(out_dir, "/ctp_differences_byage_clr.png", sep = ""), height = 9, width = 3)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).

## Warning: Removed 7 rows containing missing values (geom_point).
ggsave(paste(out_dir, "/ctp_differences_byage_clr.svg", sep = ""), height = 9, width = 3)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).

## Warning: Removed 7 rows containing missing values (geom_point).
ctp.clr.1e3.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = age, y = CTP)) +
  geom_point(alpha = 0.6, aes(colour = dx, shape = study)) +
  geom_smooth() +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  theme(legend.position = "none") +
  facet_wrap(~ celltype, ncol = 3)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).

## Warning: Removed 7 rows containing missing values (geom_point).

ggsave(paste(out_dir, "/ctp_differences_byage_clr_col3.png", sep = ""), height = 7, width = 7)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).

## Warning: Removed 7 rows containing missing values (geom_point).
ggsave(paste(out_dir, "/ctp_differences_byage_clr_col3.svg", sep = ""), height = 7, width = 7)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).

## Warning: Removed 7 rows containing missing values (geom_point).

Aggregate into 1 plot, regressing out batch effects

# Change cell-type names
ctp.clr.1e3.long$celltype <- as.character(ctp.clr.1e3.long$celltype)
#ctp.clr.1e3.long$celltype <- gsub("NonN_", "", ctp.clr.1e3.long$celltype)
#ctp.clr.1e3.long$celltype <- gsub("_FGF3R", "", ctp.clr.1e3.long$celltype)
#ctp.clr.1e3.long$celltype <- gsub("_TYROBP", "", ctp.clr.1e3.long$celltype)
#ctp.clr.1e3.long$celltype <- gsub("_MBP", "", ctp.clr.1e3.long$celltype)

batch_coef2 <- batch_coef
batch_coef2$celltype <- as.character(batch_coef2$celltype)
#batch_coef2$celltype <- gsub("NonN_", "", batch_coef2$celltype)
#batch_coef2$celltype <- gsub("_FGF3R", "", batch_coef2$celltype)
#batch_coef2$celltype <- gsub("_TYROBP", "", batch_coef2$celltype)
#batch_coef2$celltype <- gsub("_MBP", "", batch_coef2$celltype)

ctp.clr.1e3.long %>% 
#  left_join(., batch_coef2, by = c("study", "celltype", "batch")) %>% 
  left_join(., batch_coef2, by = c("celltype", "batch")) %>% 
  mutate(CTP_batchadjust = ifelse(is.na(estimate), CTP, CTP - estimate)) %>% 
  mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = age, y = CTP_batchadjust)) +
  geom_point(alpha = 0.6, aes(colour = dx, shape = study)) +
  geom_smooth() +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
#  theme(legend.position = "none") +
  facet_wrap(~ celltype)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).

#  facet_wrap(~ celltype, ncol = 1)

ggsave(paste(out_dir, "/ctp_differences_byage_clr_adjustbatch.png", sep = ""), height = 5, width = 6)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).

## Warning: Removed 7 rows containing missing values (geom_point).
ggsave(paste(out_dir, "/ctp_differences_byage_clr_adjustbatch.svg", sep = ""), height = 6, width = 6)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).

## Warning: Removed 7 rows containing missing values (geom_point).
ctp.clr.1e3.long %>% 
  #left_join(., batch_coef2, by = c("study", "celltype", "batch")) %>% 
  left_join(., batch_coef2, by = c("celltype", "batch")) %>% 
  mutate(CTP_batchadjust = ifelse(is.na(estimate), CTP, CTP - estimate)) %>% 
  mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = age, y = CTP_batchadjust)) +
  geom_point(alpha = 0.6, aes(colour = dx, shape = study)) +
  geom_smooth() +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  theme(legend.position = "none") +
  facet_wrap(~ celltype, ncol = 1)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).

ggsave(paste(out_dir, "/ctp_differences_byage_clr_adjustbatch_1col.png", sep = ""), height = 9, width = 2)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).

## Warning: Removed 7 rows containing missing values (geom_point).
ggsave(paste(out_dir, "/ctp_differences_byage_clr_adjustbatch_1col.svg", sep = ""), height = 9, width = 2)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 7 rows containing non-finite values (stat_smooth).

## Warning: Removed 7 rows containing missing values (geom_point).

3.4 clr, adjustment for oligodendrocytes

ctp_adjoligo.clr.1e3.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = age, y = CTP, colour = dx)) +
  geom_point(alpha = 0.5) +
  geom_smooth() +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_grid(celltype ~ study, scales = "free")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing missing values (geom_point).

4 Sex differences

4.1 Plot distributions

ctp %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  group_by(sex, dx, study) %>% dplyr::summarise(n = n()) %>%
  filter(!is.na(sex)) %>%
  ggplot(aes(x = sex, y = n, fill = dx)) +
  geom_col() +
  scale_fill_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_wrap(~ study, scales = "free_y")
## `summarise()` has grouped output by 'sex', 'dx'. You can override using the `.groups` argument.

ggsave(paste(out_dir, "/demographic_differences_sex.svg", sep = ""), height = 3, width = 9)

ctp %>% filter(!IID %in% asd_exclude$IID) %>% filter(!IID %in% scz_exclude$IID) %>%
  mutate(dx = fct_reorder(dx, MatchOrder)) %>% 
  group_by(sex, dx, study) %>% dplyr::summarise(n = n()) %>%
  filter(!is.na(sex)) %>%
  ggplot(aes(x = sex, y = n, fill = dx)) +
  geom_col() +
  scale_fill_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_wrap(~ study, scales = "free_y")
## `summarise()` has grouped output by 'sex', 'dx'. You can override using the `.groups` argument.

ggsave(paste(out_dir, "/demographic_differences_sex_asdsczexclude.svg", sep = ""), height = 3, width = 9)

4.2 No clr, no adjustment for oligodendrocytes

ctp.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% filter(!is.na(sex)) %>%
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = sex, y = CTP)) +
  geom_violin() +
  geom_point(aes(colour = dx)) +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_grid(celltype ~ study, scales = "free")

Keep CTP % constant

ctp.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% filter(!is.na(sex)) %>%
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = sex, y = CTP)) +
  geom_violin() +
  geom_point(aes(colour = dx)) +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_grid(celltype ~ study)

Aggregate into 1 plot

ctp.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% filter(!is.na(sex)) %>%
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = sex, y = CTP)) +
  geom_violin() +
  geom_boxplot(width=0.1, color="grey", alpha=0.2) +
  geom_point(aes(colour = dx), position = position_jitterdodge(.1),alpha=.3) +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_wrap(~ celltype, ncol = 1)

ggsave(paste(out_dir, "/ctp_differences_bysex_raw.png", sep = ""), height = 9, width = 3)
ggsave(paste(out_dir, "/ctp_differences_bysex_raw.svg", sep = ""), height = 9, width = 3)

4.3 No clr, adjustment for oligodendrocytes

ctp_adjoligo.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% filter(!is.na(sex)) %>%
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = sex, y = CTP)) +
  geom_violin() +
  geom_point(aes(colour = dx)) +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_grid(celltype ~ study, scales = "free")

4.4 clr, no adjustment for oligodendrocytes

ctp.clr.1e3.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% filter(!is.na(sex)) %>%
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = sex, y = CTP)) +
  geom_violin() +
  geom_point(aes(colour = dx)) +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_grid(celltype ~ study, scales = "free")

Keep CTP % constant

ctp.clr.1e3.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% filter(!is.na(sex)) %>%
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = sex, y = CTP)) +
  geom_violin() +
  geom_point(aes(colour = dx), position = position_jitterdodge(.1),alpha=.3) +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_grid(celltype ~ study)

Plots that regress out batch effect

ctp.clr.1e3.long %>% 
  #left_join(., batch_coef, by = c("study", "celltype", "batch")) %>% 
  left_join(., batch_coef, by = c("celltype", "batch")) %>% 
  mutate(CTP_batchadjust = ifelse(is.na(estimate), CTP, CTP - estimate)) %>% 
  mutate(dx = fct_reorder(dx, MatchOrder)) %>% filter(!is.na(sex)) %>%
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = sex, y = CTP_batchadjust)) +
  geom_violin() +
  geom_point(aes(colour = dx), position = position_jitterdodge(.1),alpha=.3) +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_grid(celltype ~ study, scales = "free")

Aggregate into 1 plot

ctp.clr.1e3.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% filter(!is.na(sex)) %>%
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = sex, y = CTP)) +
  geom_violin() +
  geom_boxplot(width=0.1, color="grey", alpha=0.2) +
  geom_point(aes(colour = dx), position = position_jitterdodge(.1),alpha=.3) +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  theme(legend.position = "none") +
  facet_wrap(~ celltype, ncol = 1)

ggsave(paste(out_dir, "/ctp_differences_bysex_clr.png", sep = ""), height = 9, width = 3)
ggsave(paste(out_dir, "/ctp_differences_bysex_clr.svg", sep = ""), height = 9, width = 3)

Aggregate into 1 plot, regressing out batch effects

ctp.clr.1e3.long %>% 
  #left_join(., batch_coef, by = c("study", "celltype", "batch")) %>% 
  left_join(., batch_coef, by = c("celltype", "batch")) %>% 
  mutate(CTP_batchadjust = ifelse(is.na(estimate), CTP, CTP - estimate)) %>% 
  mutate(dx = fct_reorder(dx, MatchOrder)) %>% filter(!is.na(sex)) %>%
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = sex, y = CTP_batchadjust)) +
  geom_violin() +
  geom_boxplot(width=0.1, color="grey", alpha=0.2) +
  geom_point(aes(colour = dx), position = position_jitterdodge(.1),alpha=.3) +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  theme(legend.position = "none") +
  facet_wrap(~ celltype, ncol = 1)

ggsave(paste(out_dir, "/ctp_differences_bysex_clr_adjustbatch.png", sep = ""), height = 9, width = 3)
ggsave(paste(out_dir, "/ctp_differences_bysex_clr_adjustbatch.svg", sep = ""), height = 9, width = 3)

4.5 clr, adjustment for oligodendrocytes

ctp_adjoligo.clr.1e3.long %>% mutate(dx = fct_reorder(dx, MatchOrder)) %>% filter(!is.na(sex)) %>%
  mutate(celltype = fct_reorder(celltype, MatchCTP)) %>% 
  ggplot(aes(x = sex, y = CTP)) +
  geom_violin() +
  geom_point(aes(colour = dx)) +
  scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
  theme_bw() +
  facet_grid(celltype ~ study, scales = "free")

6 Overall contribution of CTPs to diagnosis

  • obtain compositional PCs of CTP data
  • likelihood ratio test to test for difference in model with and without CTP PCs

6.1 CTP PCs for all data

ctp.tmp <- as.matrix(ctp %>% dplyr::select(c(level2_cols)))
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(level2_cols)` instead of `level2_cols` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
rownames(ctp.tmp) <- ctp$IID
ctp.tmp[which(ctp.tmp <= 0)] <- 1e-3
ctp.acomp <- acomp(ctp.tmp)
rownames(ctp.acomp) <- ctp$IID
mean(ctp.acomp)
##          Exc          Inh        Astro         Endo        Micro        Oligo 
## "0.21784685" "0.18036249" "0.16943513" "0.03938087" "0.03325559" "0.34647048" 
##          OPC 
## "0.01324857" 
## attr(,"class")
## [1] "acomp"
# Take supervised PCs, per individual
ctp.pcx <- princomp(ctp.acomp)
ctp.pcs <- ctp.pcx$scores

# Scree plot
screeplot(ctp.pcx, main = "Compositional PCA Scree Plot")

summary(ctp.pcx) # Take the PCs explaining 90-95% of variance
## Importance of components:
##                           Comp.1    Comp.2     Comp.3     Comp.4     Comp.5
## Standard deviation     0.6441217 0.3434477 0.23876253 0.12365993 0.11286514
## Proportion of Variance 0.6629784 0.1884884 0.09109526 0.02443551 0.02035556
## Cumulative Proportion  0.6629784 0.8514668 0.94256207 0.96699758 0.98735315
##                            Comp.6
## Standard deviation     0.08896302
## Proportion of Variance 0.01264685
## Cumulative Proportion  1.00000000
loadings(ctp.pcx)
## 
## Loadings:
##       Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## Exc   -0.536  0.388  0.301  0.535  0.192       
## Inh   -0.213               -0.602  0.438 -0.495
## Astro -0.310        -0.199 -0.421 -0.516  0.519
## Endo   0.121        -0.354  0.330 -0.518 -0.583
## Micro  0.303 -0.113 -0.583  0.222  0.482  0.362
## Oligo        -0.824  0.416                     
## OPC    0.680  0.375  0.478 -0.113              
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.143  0.143  0.143  0.143  0.143  0.143
## Cumulative Var  0.143  0.286  0.429  0.571  0.714  0.857
loadings.ma <- loadings(ctp.pcx)
class(loadings.ma) <- "matrix"
loadings.long <- melt(data.frame(celltype = rownames(loadings.ma), loadings.ma), variable.name = "CTP_PC", value.name = "Loading")
## Using celltype as id variables
analysis_order <- c(level2_cols)
loadings.long$MatchAnalysis <- match(loadings.long$celltype, analysis_order)
loadings.long %>% mutate(celltype = fct_reorder(celltype, MatchAnalysis)) %>% 
  ggplot(aes(x = CTP_PC, y = Loading, fill = celltype)) + 
  geom_bar(stat="identity", position=position_dodge())

ggsave(paste(out_dir, "/ctp_noclr_pc_ROSMAP_ASDbrain_LIBD_loadings.svg", sep = ""))
## Saving 7 x 5 in image
ggsave(paste(out_dir, "/ctp_noclr_pc_ROSMAP_ASDbrain_LIBD_loadings.png", sep = ""))
## Saving 7 x 5 in image
# Take the PCs explaining 90-95% of variance
# - Check the % of variance explained by the PCs
# - from here, take the number of PCs that explain 
foo <- ((ctp.pcx$sdev)^2)/((sum((ctp.pcx$sdev)^2)))*100
var_exp <- data.frame(Var_pc = foo, Cumulative_Var_pc = cumsum(foo))
print(var_exp)
##           Var_pc Cumulative_Var_pc
## Comp.1 66.297838          66.29784
## Comp.2 18.848843          85.14668
## Comp.3  9.109526          94.25621
## Comp.4  2.443551          96.69976
## Comp.5  2.035556          98.73531
## Comp.6  1.264685         100.00000
# Choose the number of PCs to include as covariates
# - eg. in my dataset it's the first 3 PCs
pcs_keep <- rownames(var_exp)
#pcs_keep <- rownames(var_exp)[1:which(var_exp$Cumulative_Var_pc >= 95)[1]]

# Join into pheno data frame
ctp_pc <- inner_join(ctp, data.frame(IID = rownames(ctp.pcx$scores), ctp.pcx$scores[,pcs_keep]), by = "IID", type = "inner")

# Write tables for GWAS 
saveRDS(ctp_pc, paste(out_dir, "/ctp_noclr_pc_ROSMAP_ASDbrain_LIBD.rds", sep = ""))

# Look for relationships
summary(lm(cbind(Comp.1, Comp.2, Comp.3, Comp.4, Comp.5, Comp.6) ~ age + age2 + sex + dx + batch, data = ctp_pc))
## Response Comp.1 :
## 
## Call:
## lm(formula = Comp.1 ~ age + age2 + sex + dx + batch, data = ctp_pc)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2240 -0.1939 -0.0024  0.2037  1.8578 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.560e+00  7.952e-02  19.616  < 2e-16 ***
## age             -6.567e-03  2.393e-03  -2.744  0.00615 ** 
## age2             6.508e-05  2.645e-05   2.460  0.01403 *  
## sexM             3.025e-02  2.379e-02   1.272  0.20361    
## dxAZD           -1.242e-01  1.025e-01  -1.211  0.22597    
## dxCTL           -7.200e-02  9.767e-02  -0.737  0.46117    
## dxSCZ           -4.039e-02  1.093e-01  -0.370  0.71178    
## batchASDbrain2   1.077e-01  1.548e-01   0.696  0.48659    
## batchLieber_104 -1.095e+00  9.003e-02 -12.161  < 2e-16 ***
## batchLieber_244 -1.052e+00  7.666e-02 -13.724  < 2e-16 ***
## batchLieber_289 -1.115e+00  8.011e-02 -13.924  < 2e-16 ***
## batchLieber_30  -1.028e+00  1.118e-01  -9.192  < 2e-16 ***
## batchLieber_315 -1.037e+00  8.504e-02 -12.192  < 2e-16 ***
## batchROSMAP0    -2.028e+00  9.065e-02 -22.370  < 2e-16 ***
## batchROSMAP1    -1.601e+00  8.952e-02 -17.881  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3938 on 1254 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.6305, Adjusted R-squared:  0.6264 
## F-statistic: 152.8 on 14 and 1254 DF,  p-value: < 2.2e-16
## 
## 
## Response Comp.2 :
## 
## Call:
## lm(formula = Comp.2 ~ age + age2 + sex + dx + batch, data = ctp_pc)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.72839 -0.12905  0.01993  0.16720  0.85132 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      7.166e-01  5.378e-02  13.325  < 2e-16 ***
## age             -1.060e-02  1.618e-03  -6.553 8.21e-11 ***
## age2             9.292e-05  1.789e-05   5.194 2.40e-07 ***
## sexM            -1.895e-02  1.609e-02  -1.178   0.2391    
## dxAZD            7.023e-02  6.933e-02   1.013   0.3112    
## dxCTL            5.643e-02  6.605e-02   0.854   0.3931    
## dxSCZ            1.857e-01  7.391e-02   2.513   0.0121 *  
## batchASDbrain2  -4.451e-02  1.047e-01  -0.425   0.6707    
## batchLieber_104 -8.218e-01  6.088e-02 -13.498  < 2e-16 ***
## batchLieber_244 -8.369e-01  5.184e-02 -16.144  < 2e-16 ***
## batchLieber_289 -7.763e-01  5.417e-02 -14.329  < 2e-16 ***
## batchLieber_30  -9.118e-01  7.563e-02 -12.057  < 2e-16 ***
## batchLieber_315 -5.570e-01  5.751e-02  -9.685  < 2e-16 ***
## batchROSMAP0    -5.765e-01  6.131e-02  -9.403  < 2e-16 ***
## batchROSMAP1    -3.959e-01  6.054e-02  -6.540 8.93e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2663 on 1254 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.4047, Adjusted R-squared:  0.3981 
## F-statistic:  60.9 on 14 and 1254 DF,  p-value: < 2.2e-16
## 
## 
## Response Comp.3 :
## 
## Call:
## lm(formula = Comp.3 ~ age + age2 + sex + dx + batch, data = ctp_pc)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.96598 -0.11169  0.00549  0.11488  0.77336 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -4.360e-01  3.847e-02 -11.334  < 2e-16 ***
## age              1.310e-02  1.158e-03  11.316  < 2e-16 ***
## age2            -6.301e-05  1.280e-05  -4.923 9.66e-07 ***
## sexM            -5.460e-03  1.151e-02  -0.474    0.635    
## dxAZD           -2.040e-02  4.960e-02  -0.411    0.681    
## dxCTL           -5.490e-02  4.725e-02  -1.162    0.245    
## dxSCZ           -5.229e-02  5.287e-02  -0.989    0.323    
## batchASDbrain2  -7.760e-02  7.487e-02  -1.036    0.300    
## batchLieber_104  4.069e-02  4.355e-02   0.934    0.350    
## batchLieber_244  1.993e-02  3.709e-02   0.538    0.591    
## batchLieber_289 -3.301e-02  3.875e-02  -0.852    0.395    
## batchLieber_30   5.906e-02  5.410e-02   1.092    0.275    
## batchLieber_315 -1.804e-02  4.114e-02  -0.439    0.661    
## batchROSMAP0    -2.233e-01  4.386e-02  -5.092 4.09e-07 ***
## batchROSMAP1    -5.531e-02  4.331e-02  -1.277    0.202    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1905 on 1254 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.3708, Adjusted R-squared:  0.3637 
## F-statistic: 52.78 on 14 and 1254 DF,  p-value: < 2.2e-16
## 
## 
## Response Comp.4 :
## 
## Call:
## lm(formula = Comp.4 ~ age + age2 + sex + dx + batch, data = ctp_pc)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73945 -0.04626  0.00912  0.06163  0.48182 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      8.348e-02  2.201e-02   3.793 0.000156 ***
## age              5.255e-03  6.624e-04   7.934 4.68e-15 ***
## age2            -4.900e-05  7.323e-06  -6.692 3.31e-11 ***
## sexM            -2.968e-02  6.584e-03  -4.508 7.17e-06 ***
## dxAZD            1.183e-03  2.838e-02   0.042 0.966747    
## dxCTL           -8.493e-05  2.704e-02  -0.003 0.997494    
## dxSCZ           -1.998e-02  3.025e-02  -0.660 0.509058    
## batchASDbrain2  -7.113e-02  4.284e-02  -1.660 0.097069 .  
## batchLieber_104 -1.487e-01  2.492e-02  -5.966 3.17e-09 ***
## batchLieber_244 -1.516e-01  2.122e-02  -7.146 1.51e-12 ***
## batchLieber_289 -1.272e-01  2.217e-02  -5.736 1.21e-08 ***
## batchLieber_30  -1.570e-01  3.096e-02  -5.070 4.57e-07 ***
## batchLieber_315 -1.623e-01  2.354e-02  -6.896 8.46e-12 ***
## batchROSMAP0    -1.597e-01  2.509e-02  -6.366 2.72e-10 ***
## batchROSMAP1    -2.114e-01  2.478e-02  -8.530  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.109 on 1254 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.232,  Adjusted R-squared:  0.2235 
## F-statistic: 27.06 on 14 and 1254 DF,  p-value: < 2.2e-16
## 
## 
## Response Comp.5 :
## 
## Call:
## lm(formula = Comp.5 ~ age + age2 + sex + dx + batch, data = ctp_pc)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.47159 -0.05928 -0.00175  0.06252  0.40750 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      7.702e-02  2.072e-02   3.717 0.000211 ***
## age             -4.567e-03  6.236e-04  -7.324 4.30e-13 ***
## age2             2.924e-05  6.894e-06   4.241 2.39e-05 ***
## sexM             2.647e-02  6.198e-03   4.271 2.09e-05 ***
## dxAZD            2.313e-02  2.671e-02   0.866 0.386724    
## dxCTL           -1.308e-02  2.545e-02  -0.514 0.607310    
## dxSCZ           -4.283e-02  2.848e-02  -1.504 0.132887    
## batchASDbrain2  -3.720e-02  4.033e-02  -0.922 0.356495    
## batchLieber_104  4.350e-02  2.346e-02   1.854 0.063946 .  
## batchLieber_244  6.134e-02  1.998e-02   3.071 0.002181 ** 
## batchLieber_289  3.138e-02  2.088e-02   1.503 0.133080    
## batchLieber_30   7.585e-02  2.914e-02   2.603 0.009361 ** 
## batchLieber_315 -5.230e-03  2.216e-02  -0.236 0.813460    
## batchROSMAP0     7.965e-02  2.362e-02   3.372 0.000770 ***
## batchROSMAP1     1.106e-01  2.333e-02   4.740 2.38e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1026 on 1254 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.183,  Adjusted R-squared:  0.1738 
## F-statistic: 20.06 on 14 and 1254 DF,  p-value: < 2.2e-16
## 
## 
## Response Comp.6 :
## 
## Call:
## lm(formula = Comp.6 ~ age + age2 + sex + dx + batch, data = ctp_pc)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37172 -0.05030  0.00504  0.05521  0.31257 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -3.421e-02  1.719e-02  -1.990 0.046861 *  
## age              1.968e-03  5.174e-04   3.803 0.000150 ***
## age2            -1.472e-05  5.720e-06  -2.573 0.010205 *  
## sexM             1.851e-02  5.143e-03   3.598 0.000333 ***
## dxAZD            1.023e-02  2.217e-02   0.462 0.644420    
## dxCTL            4.804e-03  2.112e-02   0.227 0.820117    
## dxSCZ           -5.329e-03  2.363e-02  -0.226 0.821601    
## batchASDbrain2   1.379e-02  3.346e-02   0.412 0.680299    
## batchLieber_104 -4.633e-02  1.947e-02  -2.380 0.017461 *  
## batchLieber_244 -5.134e-02  1.658e-02  -3.097 0.001998 ** 
## batchLieber_289 -7.197e-02  1.732e-02  -4.155 3.47e-05 ***
## batchLieber_30  -1.728e-02  2.418e-02  -0.714 0.475058    
## batchLieber_315 -1.201e-02  1.839e-02  -0.653 0.513825    
## batchROSMAP0    -7.058e-03  1.960e-02  -0.360 0.718864    
## batchROSMAP1    -4.390e-02  1.936e-02  -2.268 0.023491 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08516 on 1254 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.09459,    Adjusted R-squared:  0.08448 
## F-statistic: 9.358 on 14 and 1254 DF,  p-value: < 2.2e-16

6.2 ASDbrain

studyn <- "ASDbrain"

# Make compositional object
tmp <- ctp %>% filter(study == all_of(studyn)) %>% filter(!IID %in% asd_exclude$IID)
ctp.tmp <- as.matrix(tmp %>% dplyr::select(c(level2_cols)))
ctp.tmp[which(ctp.tmp <= 0)] <- 1e-3
ctp.acomp <- acomp(ctp.tmp)
rownames(ctp.acomp) <- tmp$IID
mean(ctp.acomp)
##          Exc          Inh        Astro         Endo        Micro        Oligo 
## "0.17507036" "0.16696920" "0.15284102" "0.06974837" "0.07261286" "0.30735471" 
##          OPC 
## "0.05540349" 
## attr(,"class")
## [1] "acomp"
# Take supervised PCs, per individual
ctp.pcx <- princomp(ctp.acomp)
ctp.pcs <- ctp.pcx$scores

# Scree plot
screeplot(ctp.pcx, main = "Compositional PCA Scree Plot")

summary(ctp.pcx) # Take the PCs explaining 90-95% of variance
## Importance of components:
##                           Comp.1    Comp.2     Comp.3     Comp.4     Comp.5
## Standard deviation     0.4370104 0.1877183 0.10987993 0.07224782 0.05279493
## Proportion of Variance 0.7723983 0.1425184 0.04883088 0.02111093 0.01127307
## Cumulative Proportion  0.7723983 0.9149167 0.96374759 0.98485852 0.99613159
##                            Comp.6
## Standard deviation     0.03092697
## Proportion of Variance 0.00386841
## Cumulative Proportion  1.00000000
loadings(ctp.pcx)
## 
## Loadings:
##       Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## Exc   -0.484  0.683 -0.295  0.222  0.143       
## Inh   -0.215 -0.212 -0.334 -0.487 -0.646       
## Astro -0.330         0.746 -0.352  0.228  0.119
## Endo         -0.264  0.174  0.487 -0.149 -0.705
## Micro        -0.433 -0.450 -0.132  0.664       
## Oligo  0.753  0.448        -0.258        -0.125
## OPC    0.186 -0.167         0.518 -0.217  0.687
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.143  0.143  0.143  0.143  0.143  0.143
## Cumulative Var  0.143  0.286  0.429  0.571  0.714  0.857
loadings.ma <- loadings(ctp.pcx)
class(loadings.ma) <- "matrix"
loadings.long <- melt(data.frame(celltype = rownames(loadings.ma), loadings.ma), variable.name = "CTP_PC", value.name = "Loading")
## Using celltype as id variables
analysis_order <- c(level2_cols)
loadings.long$MatchAnalysis <- match(loadings.long$celltype, analysis_order)
loadings.long %>% mutate(celltype = fct_reorder(celltype, MatchAnalysis)) %>% 
  ggplot(aes(x = CTP_PC, y = Loading, fill = celltype)) + 
  geom_bar(stat="identity", position=position_dodge())

ggsave(paste(out_dir, "/ctp_noclr_pc_ASDbrain_loadings.svg", sep = ""))
## Saving 7 x 5 in image
ggsave(paste(out_dir, "/ctp_noclr_pc_ASDbrain_loadings.png", sep = ""))
## Saving 7 x 5 in image
# Take the PCs explaining 90-95% of variance
# - Check the % of variance explained by the PCs
# - from here, take the number of PCs that explain 
foo <- ((ctp.pcx$sdev)^2)/((sum((ctp.pcx$sdev)^2)))*100
var_exp <- data.frame(Var_pc = foo, Cumulative_Var_pc = cumsum(foo))
print(var_exp)
##           Var_pc Cumulative_Var_pc
## Comp.1 77.239830          77.23983
## Comp.2 14.251841          91.49167
## Comp.3  4.883088          96.37476
## Comp.4  2.111093          98.48585
## Comp.5  1.127307          99.61316
## Comp.6  0.386841         100.00000
# Choose the number of PCs to include as covariates
# - eg. in my dataset it's the first 3 PCs
pcs_keep <- rownames(var_exp)[1:which(var_exp$Cumulative_Var_pc >= 95)[1]]

# Join into pheno data frame
ctp_pc <- inner_join(ctp, data.frame(IID = rownames(ctp.pcx$scores), ctp.pcx$scores[,pcs_keep]), by = "IID", type = "inner")
ctp_pc <- ctp_pc %>% mutate(MatchDx = match(dx, c("CTL", "ASD"))) %>% mutate(dx = fct_reorder(dx, MatchDx)) %>% dplyr::select(-c("MatchDx"))

# Run model
mod0 <- lrm(dx ~ age + sex + batch, x = TRUE, y = TRUE, data = ctp_pc %>% filter(study == all_of(studyn)) %>% filter(!IID %in% asd_exclude$IID))
mod1 <- lrm(dx ~ age + sex + batch + Comp.1 + Comp.2 + Comp.3, x = TRUE, y = TRUE, data = ctp_pc %>% filter(study == all_of(studyn)) %>% filter(!IID %in% asd_exclude$IID))

mod0
## Logistic Regression Model
##  
##  lrm(formula = dx ~ age + sex + batch, data = ctp_pc %>% filter(study == 
##      all_of(studyn)) %>% filter(!IID %in% asd_exclude$IID), x = TRUE, 
##      y = TRUE)
##  
##                       Model Likelihood    Discrimination    Rank Discrim.    
##                             Ratio Test           Indexes          Indexes    
##  Obs           58    LR chi2     13.22    R2       0.272    C       0.722    
##   CTL          27    d.f.            3    g        2.318    Dxy     0.444    
##   ASD          31    Pr(> chi2) 0.0042    gr      10.155    gamma   0.449    
##  max |deriv| 0.09                         gp       0.233    tau-a   0.225    
##                                           Brier    0.204                     
##  
##                  Coef    S.E.    Wald Z Pr(>|Z|)
##  Intercept        1.0364  0.9636  1.08  0.2821  
##  age             -0.0336  0.0188 -1.79  0.0729  
##  sex=M            0.1721  0.6890  0.25  0.8027  
##  batch=ASDbrain2  8.4080 22.4842  0.37  0.7084  
## 
mod1
## Logistic Regression Model
##  
##  lrm(formula = dx ~ age + sex + batch + Comp.1 + Comp.2 + Comp.3, 
##      data = ctp_pc %>% filter(study == all_of(studyn)) %>% filter(!IID %in% 
##          asd_exclude$IID), x = TRUE, y = TRUE)
##  
##                      Model Likelihood    Discrimination    Rank Discrim.    
##                            Ratio Test           Indexes          Indexes    
##  Obs          58    LR chi2     19.49    R2       0.381    C       0.806    
##   CTL         27    d.f.            6    g        2.804    Dxy     0.613    
##   ASD         31    Pr(> chi2) 0.0034    gr      16.507    gamma   0.613    
##  max |deriv| 0.1                         gp       0.306    tau-a   0.310    
##                                          Brier    0.181                     
##  
##                  Coef    S.E.    Wald Z Pr(>|Z|)
##  Intercept        0.8974  1.0929  0.82  0.4116  
##  age             -0.0204  0.0217 -0.94  0.3486  
##  sex=M           -0.2455  0.7444 -0.33  0.7416  
##  batch=ASDbrain2  8.3995 20.9311  0.40  0.6882  
##  Comp.1           0.4408  0.7729  0.57  0.5685  
##  Comp.2          -4.6111  1.9832 -2.33  0.0201  
##  Comp.3          -1.7074  3.1372 -0.54  0.5863  
## 
lrtest(mod0, mod1)
## Likelihood ratio test
## 
## Model 1: dx ~ age + sex + batch
## Model 2: dx ~ age + sex + batch + Comp.1 + Comp.2 + Comp.3
##   #Df  LogLik Df  Chisq Pr(>Chisq)  
## 1   4 -33.457                       
## 2   7 -30.319  3 6.2763    0.09891 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modx <- lrm(dx ~ Comp.1 + Comp.2 + Comp.3, x = TRUE, y = TRUE, data = ctp_pc %>% filter(study == all_of(studyn)) %>% filter(!IID %in% asd_exclude$IID))
modx
## Logistic Regression Model
##  
##  lrm(formula = dx ~ Comp.1 + Comp.2 + Comp.3, data = ctp_pc %>% 
##      filter(study == all_of(studyn)) %>% filter(!IID %in% asd_exclude$IID), 
##      x = TRUE, y = TRUE)
##  
##                        Model Likelihood    Discrimination    Rank Discrim.    
##                              Ratio Test           Indexes          Indexes    
##  Obs            58    LR chi2      9.61    R2       0.204    C       0.704    
##   CTL           27    d.f.            3    g        1.062    Dxy     0.407    
##   ASD           31    Pr(> chi2) 0.0222    gr       2.893    gamma   0.407    
##  max |deriv| 2e-08                         gp       0.227    tau-a   0.206    
##                                            Brier    0.213                     
##  
##            Coef    S.E.   Wald Z Pr(>|Z|)
##  Intercept  0.1949 0.2902  0.67  0.5018  
##  Comp.1     0.4185 0.7071  0.59  0.5540  
##  Comp.2    -4.8998 1.8049 -2.71  0.0066  
##  Comp.3    -0.7211 2.6864 -0.27  0.7884  
## 
saveRDS(mod0, paste(out_dir, "/ASD_methylation_brain_ctp_mod0.rds", sep = ""))
saveRDS(mod1, paste(out_dir, "/ASD_methylation_brain_ctp_mod1.rds", sep = ""))

6.3 LIBD

studyn <- "LIBD"

# Make compositional object
tmp <- ctp %>% filter(study == all_of(studyn)) %>% filter(!IID %in% scz_exclude$IID)
ctp.tmp <- as.matrix(tmp %>% dplyr::select(c(level2_cols)))
ctp.tmp[which(ctp.tmp <= 0)] <- 1e-3
ctp.acomp <- acomp(ctp.tmp)
rownames(ctp.acomp) <- tmp$IID
mean(ctp.acomp)
##          Exc          Inh        Astro         Endo        Micro        Oligo 
## "0.17190648" "0.16256977" "0.15258062" "0.04341379" "0.03789097" "0.41669609" 
##          OPC 
## "0.01494227" 
## attr(,"class")
## [1] "acomp"
# Take supervised PCs, per individual
ctp.pcx <- princomp(ctp.acomp)
ctp.pcs <- ctp.pcx$scores

# Scree plot
screeplot(ctp.pcx, main = "Compositional PCA Scree Plot")

summary(ctp.pcx) # Take the PCs explaining 90-95% of variance
## Importance of components:
##                          Comp.1    Comp.2     Comp.3     Comp.4     Comp.5
## Standard deviation     0.474716 0.2368346 0.17049997 0.10769750 0.07567870
## Proportion of Variance 0.679646 0.1691629 0.08767256 0.03498052 0.01727278
## Cumulative Proportion  0.679646 0.8488090 0.93648152 0.97146204 0.98873482
##                            Comp.6
## Standard deviation     0.06111693
## Proportion of Variance 0.01126518
## Cumulative Proportion  1.00000000
loadings(ctp.pcx)
## 
## Loadings:
##       Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## Exc   -0.703         0.473 -0.328         0.170
## Inh   -0.121        -0.213        -0.317 -0.834
## Astro -0.268        -0.300  0.568  0.607       
## Endo          0.188 -0.268  0.274 -0.666  0.480
## Micro  0.239        -0.482 -0.681  0.260  0.187
## Oligo  0.380 -0.774  0.286  0.171              
## OPC    0.466  0.599  0.505         0.139       
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.143  0.143  0.143  0.143  0.143  0.143
## Cumulative Var  0.143  0.286  0.429  0.571  0.714  0.857
loadings.ma <- loadings(ctp.pcx)
class(loadings.ma) <- "matrix"
loadings.long <- melt(data.frame(celltype = rownames(loadings.ma), loadings.ma), variable.name = "CTP_PC", value.name = "Loading")
## Using celltype as id variables
analysis_order <- c(level2_cols)
loadings.long$MatchAnalysis <- match(loadings.long$celltype, analysis_order)
loadings.long %>% mutate(celltype = fct_reorder(celltype, MatchAnalysis)) %>% 
  ggplot(aes(x = CTP_PC, y = Loading, fill = celltype)) + 
  geom_bar(stat="identity", position=position_dodge())

ggsave(paste(out_dir, "/ctp_noclr_pc_LIBD_loadings.svg", sep = ""))
## Saving 7 x 5 in image
ggsave(paste(out_dir, "/ctp_noclr_pc_LIBD_loadings.png", sep = ""))
## Saving 7 x 5 in image
# Take the PCs explaining 90-95% of variance
# - Check the % of variance explained by the PCs
# - from here, take the number of PCs that explain 
foo <- ((ctp.pcx$sdev)^2)/((sum((ctp.pcx$sdev)^2)))*100
var_exp <- data.frame(Var_pc = foo, Cumulative_Var_pc = cumsum(foo))
print(var_exp)
##           Var_pc Cumulative_Var_pc
## Comp.1 67.964602          67.96460
## Comp.2 16.916294          84.88090
## Comp.3  8.767256          93.64815
## Comp.4  3.498052          97.14620
## Comp.5  1.727278          98.87348
## Comp.6  1.126518         100.00000
# Choose the number of PCs to include as covariates
# - eg. in my dataset it's the first 3 PCs
pcs_keep <- rownames(var_exp)[1:which(var_exp$Cumulative_Var_pc >= 95)[1]]

# Join into pheno data frame
ctp_pc <- inner_join(ctp, data.frame(IID = rownames(ctp.pcx$scores), ctp.pcx$scores[,pcs_keep]), by = "IID", type = "inner")
ctp_pc <- ctp_pc %>% mutate(MatchDx = match(dx, c("CTL", "SCZ"))) %>% mutate(dx = fct_reorder(dx, MatchDx)) %>% dplyr::select(-c("MatchDx"))

# Write table
saveRDS(ctp_pc, paste(out_dir, "/ctp_noclr_pc95_", studyn, ".rds", sep = ""))

# Run model
mod0 <- lrm(dx ~ age + sex + batch, x = TRUE, y = TRUE, data = ctp_pc %>% filter(study == all_of(studyn)) %>% filter(!IID %in% scz_exclude$IID))
mod1 <- lrm(dx ~ age + sex + batch + Comp.1 + Comp.2 + Comp.3 + Comp.4, x = TRUE, y = TRUE, data = ctp_pc %>% filter(study == all_of(studyn)) %>% filter(!IID %in% scz_exclude$IID))

mod0
## Logistic Regression Model
##  
##  lrm(formula = dx ~ age + sex + batch, data = ctp_pc %>% filter(study == 
##      all_of(studyn)) %>% filter(!IID %in% scz_exclude$IID), x = TRUE, 
##      y = TRUE)
##  
##                        Model Likelihood    Discrimination    Rank Discrim.    
##                              Ratio Test           Indexes          Indexes    
##  Obs          402    LR chi2     112.53    R2       0.326    C       0.755    
##   CTL         217    d.f.             6    g        3.358    Dxy     0.511    
##   SCZ         185    Pr(> chi2) <0.0001    gr      28.740    gamma   0.511    
##  max |deriv| 0.04                          gp       0.258    tau-a   0.254    
##                                            Brier    0.194                     
##  
##                   Coef     S.E.    Wald Z Pr(>|Z|)
##  Intercept        -12.1495 27.5881 -0.44  0.6597  
##  age                0.0389  0.0084  4.63  <0.0001 
##  sex=M             -0.5285  0.2443 -2.16  0.0305  
##  batch=Lieber_244  10.6725 27.5840  0.39  0.6988  
##  batch=Lieber_289  10.3718 27.5840  0.38  0.7069  
##  batch=Lieber_30   21.0306 41.8385  0.50  0.6152  
##  batch=Lieber_315  10.4588 27.5845  0.38  0.7046  
## 
mod1
## Logistic Regression Model
##  
##  lrm(formula = dx ~ age + sex + batch + Comp.1 + Comp.2 + Comp.3 + 
##      Comp.4, data = ctp_pc %>% filter(study == all_of(studyn)) %>% 
##      filter(!IID %in% scz_exclude$IID), x = TRUE, y = TRUE)
##  
##                        Model Likelihood    Discrimination    Rank Discrim.    
##                              Ratio Test           Indexes          Indexes    
##  Obs          402    LR chi2     136.35    R2       0.384    C       0.800    
##   CTL         217    d.f.            10    g        3.600    Dxy     0.600    
##   SCZ         185    Pr(> chi2) <0.0001    gr      36.596    gamma   0.600    
##  max |deriv| 0.04                          gp       0.297    tau-a   0.299    
##                                            Brier    0.179                     
##  
##                   Coef     S.E.    Wald Z Pr(>|Z|)
##  Intercept        -11.6673 27.0907 -0.43  0.6667  
##  age                0.0295  0.0110  2.68  0.0073  
##  sex=M             -0.4127  0.2603 -1.59  0.1128  
##  batch=Lieber_244  10.6149 27.0855  0.39  0.6951  
##  batch=Lieber_289  10.3106 27.0855  0.38  0.7034  
##  batch=Lieber_30   21.0400 41.1842  0.51  0.6094  
##  batch=Lieber_315   9.8542 27.0862  0.36  0.7160  
##  Comp.1            -0.2531  0.2578 -0.98  0.3263  
##  Comp.2             2.3650  0.5585  4.23  <0.0001 
##  Comp.3             1.1899  0.8619  1.38  0.1674  
##  Comp.4             2.0001  1.3121  1.52  0.1274  
## 
lrtest(mod0, mod1)
## Likelihood ratio test
## 
## Model 1: dx ~ age + sex + batch
## Model 2: dx ~ age + sex + batch + Comp.1 + Comp.2 + Comp.3 + Comp.4
##   #Df  LogLik Df  Chisq Pr(>Chisq)    
## 1   7 -221.11                         
## 2  11 -209.19  4 23.826  8.654e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modx <- lrm(dx ~ Comp.1 + Comp.2 + Comp.3 + Comp.4, x = TRUE, y = TRUE, data = ctp_pc %>% filter(study == all_of(studyn)) %>% filter(!IID %in% scz_exclude$IID))
modx
## Logistic Regression Model
##  
##  lrm(formula = dx ~ Comp.1 + Comp.2 + Comp.3 + Comp.4, data = ctp_pc %>% 
##      filter(study == all_of(studyn)) %>% filter(!IID %in% scz_exclude$IID), 
##      x = TRUE, y = TRUE)
##  
##                         Model Likelihood    Discrimination    Rank Discrim.    
##                               Ratio Test           Indexes          Indexes    
##  Obs           402    LR chi2      37.10    R2       0.118    C       0.672    
##   CTL          217    d.f.             4    g        0.736    Dxy     0.345    
##   SCZ          185    Pr(> chi2) <0.0001    gr       2.088    gamma   0.345    
##  max |deriv| 3e-11                          gp       0.167    tau-a   0.172    
##                                             Brier    0.226                     
##  
##            Coef    S.E.   Wald Z Pr(>|Z|)
##  Intercept -0.1733 0.1050 -1.65  0.0989  
##  Comp.1    -0.0861 0.2253 -0.38  0.7024  
##  Comp.2     2.1638 0.4877  4.44  <0.0001 
##  Comp.3     1.7716 0.6363  2.78  0.0054  
##  Comp.4     2.7102 1.0117  2.68  0.0074  
## 
saveRDS(mod0, paste(out_dir, "/LIBD_ctp_mod0.rds", sep = ""))
saveRDS(mod1, paste(out_dir, "/LIBD_ctp_mod1.rds", sep = ""))

6.4 ROSMAP

studyn <- "ROSMAP"

# Make compositional object
tmp <- ctp %>% filter(study == all_of(studyn))
ctp.tmp <- as.matrix(tmp %>% dplyr::select(c(level2_cols)))
ctp.tmp[which(ctp.tmp <= 0)] <- 1e-3
ctp.acomp <- acomp(ctp.tmp)
rownames(ctp.acomp) <- tmp$IID
mean(ctp.acomp)
##          Exc          Inh        Astro         Endo        Micro        Oligo 
## "0.25556872" "0.18533533" "0.17725115" "0.03342661" "0.02644621" "0.31168476" 
##          OPC 
## "0.01028722" 
## attr(,"class")
## [1] "acomp"
# Take supervised PCs, per individual
ctp.pcx <- princomp(ctp.acomp)
ctp.pcs <- ctp.pcx$scores

# Scree plot
screeplot(ctp.pcx, main = "Compositional PCA Scree Plot")

summary(ctp.pcx) # Take the PCs explaining 90-95% of variance
## Importance of components:
##                           Comp.1    Comp.2    Comp.3     Comp.4     Comp.5
## Standard deviation     0.4794320 0.2177051 0.1910892 0.11050329 0.09158798
## Proportion of Variance 0.6747181 0.1391251 0.1071866 0.03584419 0.02462324
## Cumulative Proportion  0.6747181 0.8138432 0.9210298 0.95687401 0.98149725
##                            Comp.6
## Standard deviation     0.07939332
## Proportion of Variance 0.01850275
## Cumulative Proportion  1.00000000
loadings(ctp.pcx)
## 
## Loadings:
##       Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## Exc   -0.329 -0.153  0.590 -0.299         0.536
## Inh   -0.119 -0.145  0.168 -0.401 -0.154 -0.781
## Astro -0.294 -0.284         0.609  0.541 -0.152
## Endo         -0.162 -0.200  0.384 -0.789  0.136
## Micro        -0.179 -0.705 -0.463  0.208  0.247
## Oligo -0.175  0.903                            
## OPC    0.866         0.280  0.119  0.119       
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.143  0.143  0.143  0.143  0.143  0.143
## Cumulative Var  0.143  0.286  0.429  0.571  0.714  0.857
loadings.ma <- loadings(ctp.pcx)
class(loadings.ma) <- "matrix"
loadings.long <- melt(data.frame(celltype = rownames(loadings.ma), loadings.ma), variable.name = "CTP_PC", value.name = "Loading")
## Using celltype as id variables
analysis_order <- c(level2_cols)
loadings.long$MatchAnalysis <- match(loadings.long$celltype, analysis_order)
loadings.long %>% mutate(celltype = fct_reorder(celltype, MatchAnalysis)) %>% 
  ggplot(aes(x = CTP_PC, y = Loading, fill = celltype)) + 
  geom_bar(stat="identity", position=position_dodge())

ggsave(paste(out_dir, "/ctp_noclr_pc_ROSMAP_loadings.svg", sep = ""))
## Saving 7 x 5 in image
ggsave(paste(out_dir, "/ctp_noclr_pc_ROSMAP_loadings.png", sep = ""))
## Saving 7 x 5 in image
# Take the PCs explaining 90-95% of variance
# - Check the % of variance explained by the PCs
# - from here, take the number of PCs that explain 
foo <- ((ctp.pcx$sdev)^2)/((sum((ctp.pcx$sdev)^2)))*100
var_exp <- data.frame(Var_pc = foo, Cumulative_Var_pc = cumsum(foo))
print(var_exp)
##           Var_pc Cumulative_Var_pc
## Comp.1 67.471811          67.47181
## Comp.2 13.912510          81.38432
## Comp.3 10.718660          92.10298
## Comp.4  3.584419          95.68740
## Comp.5  2.462324          98.14973
## Comp.6  1.850275         100.00000
# Choose the number of PCs to include as covariates
# - eg. in my dataset it's the first 3 PCs
pcs_keep <- rownames(var_exp)[1:which(var_exp$Cumulative_Var_pc >= 95)[1]]

# Join into pheno data frame
ctp_pc <- inner_join(ctp, data.frame(IID = rownames(ctp.pcx$scores), ctp.pcx$scores[,pcs_keep]), by = "IID", type = "inner")
ctp_pc <- ctp_pc %>% mutate(MatchDx = match(dx, c("CTL", "AZD"))) %>% mutate(dx = fct_reorder(dx, MatchDx)) %>% dplyr::select(-c("MatchDx"))

# Write table
saveRDS(ctp_pc, paste(out_dir, "/ctp_noclr_pc95_", studyn, ".rds", sep = ""))

# Run model
mod0 <- lrm(dx ~ age + sex + batch, x = TRUE, y = TRUE, data = ctp_pc %>% filter(study == all_of(studyn)))
mod1 <- lrm(dx ~ age + sex + batch + Comp.1 + Comp.2 + Comp.3 + Comp.4, x = TRUE, y = TRUE, data = ctp_pc %>% filter(study == all_of(studyn)))

mod0
## Frequencies of Missing Values Due to Each Variable
##    dx   age   sex batch 
##     0     1     1     0 
## 
## Logistic Regression Model
##  
##  lrm(formula = dx ~ age + sex + batch, data = ctp_pc %>% filter(study == 
##      all_of(studyn)), x = TRUE, y = TRUE)
##  
##  
##                         Model Likelihood    Discrimination    Rank Discrim.    
##                               Ratio Test           Indexes          Indexes    
##  Obs           718    LR chi2      62.29    R2       0.112    C       0.664    
##   CTL          418    d.f.             3    g        0.720    Dxy     0.328    
##   AZD          300    Pr(> chi2) <0.0001    gr       2.055    gamma   0.346    
##  max |deriv| 9e-06                          gp       0.153    tau-a   0.160    
##                                             Brier    0.223                     
##  
##                Coef     S.E.   Wald Z Pr(>|Z|)
##  Intercept     -12.8460 1.8394 -6.98  <0.0001 
##  age             0.1423 0.0209  6.81  <0.0001 
##  sex=M           0.0060 0.1681  0.04  0.9717  
##  batch=ROSMAP1   0.2693 0.1643  1.64  0.1011  
## 
mod1
## Frequencies of Missing Values Due to Each Variable
##     dx    age    sex  batch Comp.1 Comp.2 Comp.3 Comp.4 
##      0      1      1      0      0      0      0      0 
## 
## Logistic Regression Model
##  
##  lrm(formula = dx ~ age + sex + batch + Comp.1 + Comp.2 + Comp.3 + 
##      Comp.4, data = ctp_pc %>% filter(study == all_of(studyn)), 
##      x = TRUE, y = TRUE)
##  
##  
##                         Model Likelihood    Discrimination    Rank Discrim.    
##                               Ratio Test           Indexes          Indexes    
##  Obs           718    LR chi2      90.00    R2       0.159    C       0.704    
##   CTL          418    d.f.             7    g        0.911    Dxy     0.408    
##   AZD          300    Pr(> chi2) <0.0001    gr       2.487    gamma   0.408    
##  max |deriv| 2e-05                          gp       0.192    tau-a   0.199    
##                                             Brier    0.214                     
##  
##                Coef     S.E.   Wald Z Pr(>|Z|)
##  Intercept     -12.2339 1.8740 -6.53  <0.0001 
##  age             0.1356 0.0212  6.39  <0.0001 
##  sex=M           0.0367 0.1727  0.21  0.8317  
##  batch=ROSMAP1   0.1828 0.1994  0.92  0.3594  
##  Comp.1         -0.0772 0.2021 -0.38  0.7024  
##  Comp.2         -0.0442 0.3713 -0.12  0.9053  
##  Comp.3          1.3743 0.4456  3.08  0.0020  
##  Comp.4         -3.0507 0.7471 -4.08  <0.0001 
## 
lrtest(mod0, mod1)
## Likelihood ratio test
## 
## Model 1: dx ~ age + sex + batch
## Model 2: dx ~ age + sex + batch + Comp.1 + Comp.2 + Comp.3 + Comp.4
##   #Df  LogLik Df  Chisq Pr(>Chisq)    
## 1   4 -456.80                         
## 2   8 -442.94  4 27.717  1.423e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modx <- lrm(dx ~ Comp.1 + Comp.2 + Comp.3 + Comp.4, x = TRUE, y = TRUE, data = ctp_pc %>% filter(study == all_of(studyn)))
modx
## Logistic Regression Model
##  
##  lrm(formula = dx ~ Comp.1 + Comp.2 + Comp.3 + Comp.4, data = ctp_pc %>% 
##      filter(study == all_of(studyn)), x = TRUE, y = TRUE)
##  
##                         Model Likelihood    Discrimination    Rank Discrim.    
##                               Ratio Test           Indexes          Indexes    
##  Obs           719    LR chi2      38.73    R2       0.071    C       0.631    
##   CTL          419    d.f.             4    g        0.542    Dxy     0.262    
##   AZD          300    Pr(> chi2) <0.0001    gr       1.719    gamma   0.263    
##  max |deriv| 5e-15                          gp       0.127    tau-a   0.128    
##                                             Brier    0.230                     
##  
##            Coef    S.E.   Wald Z Pr(>|Z|)
##  Intercept -0.3483 0.0778 -4.48  <0.0001 
##  Comp.1     0.0626 0.1672  0.37  0.7082  
##  Comp.2    -0.0529 0.3605 -0.15  0.8833  
##  Comp.3     1.8008 0.4221  4.27  <0.0001 
##  Comp.4    -3.1047 0.7203 -4.31  <0.0001 
## 
saveRDS(mod0, paste(out_dir, "/ROSMAP_ctp_mod0.rds", sep = ""))
saveRDS(mod1, paste(out_dir, "/ROSMAP_ctp_mod1.rds", sep = ""))

7 Effect sizes of each cell-type of diagnosis

7.1 No adjustment for oligodendrocytes

7.1.1 ASDbrain

7.1.1.1 No covariates

  • important for this study due to confounding of brain bank + age
studyn <- "ASDbrain"

tmp <- ctp %>% filter(study == all_of(studyn)) %>% filter(!IID %in% asd_exclude$IID)

# - run OLS --> clustered SE to account for correlation
mod2.Exc <- ols(Exc ~ dx, x = TRUE, y = TRUE, data = tmp)
mod2.Inh <- ols(Inh ~ dx, x = TRUE, y = TRUE, data = tmp)
mod2.Astro <- ols(NonN_Astro_FGF3R ~ dx, x = TRUE, y = TRUE, data = tmp)
mod2.MicroEndo <- ols(NonN_Micro.Endo_TYROBP ~ dx, x = TRUE, y = TRUE, data = tmp)
mod2.Oligo <- ols(NonN_Oligo_MBP ~ dx, x = TRUE, y = TRUE, data = tmp)

mod2.bhat <- as.matrix(data.frame(
  Exc = mod2.Exc$coef, 
  Inh = mod2.Inh$coef,
  Astro = mod2.Astro$coef,
  MicroEndo = mod2.MicroEndo$coef,
  Oligo = mod2.Oligo$coef))

mod2.se <- as.matrix(data.frame(
  Exc = tidy(summary.lm(mod2.Exc))$std.error, 
  Inh = tidy(summary.lm(mod2.Inh))$std.error, 
  Astro = tidy(summary.lm(mod2.Astro))$std.error, 
  MicroEndo = tidy(summary.lm(mod2.MicroEndo))$std.error, 
  Oligo = tidy(summary.lm(mod2.Oligo))$std.error))

# Apply mashr, using hthe canonical method
mash_data <- mash_set_data(mod2.bhat, mod2.se)
# canonical method
U.c <- cov_canonical(mash_data)
m.c <- mash(mash_data, U.c)
print(get_loglik(m.c),digits = 10)
m.c.pm <- get_pm(m.c)
m.c.sd <- get_psd(m.c)
get_lfsr(m.c)

# Forest plot for effect sizes
mash_plot <- data.frame(celltype = c("Exc", "Inh", "Astro", "MicroEndo", "Oligo"), Post_mean = m.c.pm[grep("dx", rownames(m.c.pm)),], Post_SD = m.c.sd[grep("dx", rownames(m.c.pm)),])
mash_plot$ci_l <- mash_plot$Post_mean - mash_plot$Post_SD
mash_plot$ci_u <- mash_plot$Post_mean + mash_plot$Post_SD
analysis_order <- c("Exc", "Inh", "Astro", "MicroEndo", "Oligo")
mash_plot$MatchAnalysis <- match(mash_plot$celltype, analysis_order)

mash_plot %>% mutate(celltype = fct_reorder(celltype, MatchAnalysis)) %>% ggplot(aes(y=celltype, x=Post_mean, xmin=ci_l, xmax=ci_u, colour = celltype))+ 
  geom_point() + 
  geom_errorbarh(height=.1) +
  ggtitle(paste("mashr posterior mean per cell-type, for diagnosis in ", studyn, sep = "")) +
  theme(legend.position = "none")
ggsave(paste(out_dir, "/", studyn, "_CTPeffect_mashr_nocov.png", sep = ""))

7.1.1.2 Covariates

studyn <- "ASDbrain"

tmp <- ctp %>% filter(study == all_of(studyn)) %>% filter(!IID %in% asd_exclude$IID)

# - run OLS --> clustered SE to account for correlation
mod2.Exc <- ols(Exc ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.Inh <- ols(Inh ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.Astro <- ols(NonN_Astro_FGF3R ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.MicroEndo <- ols(NonN_Micro.Endo_TYROBP ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.Oligo <- ols(NonN_Oligo_MBP ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)

mod2.Exc.s <- robcov(mod2.Exc, cluster = tmp$IID)
mod2.Inh.s <- robcov(mod2.Inh, cluster = tmp$IID)
mod2.Astro.s <- robcov(mod2.Astro, cluster = tmp$IID)
mod2.MicroEndo.s <- robcov(mod2.MicroEndo, cluster = tmp$IID)
mod2.Oligo.s <- robcov(mod2.Oligo, cluster = tmp$IID)

mod2.bhat <- as.matrix(data.frame(
  Exc = mod2.Exc.s$coef, 
  Inh = mod2.Inh.s$coef,
  Astro = mod2.Astro.s$coef,
  MicroEndo = mod2.MicroEndo.s$coef,
  Oligo = mod2.Oligo.s$coef))

mod2.se <- as.matrix(data.frame(
  Exc = tidy(summary.lm(mod2.Exc))$std.error, 
  Inh = tidy(summary.lm(mod2.Inh))$std.error, 
  Astro = tidy(summary.lm(mod2.Astro))$std.error, 
  MicroEndo = tidy(summary.lm(mod2.MicroEndo))$std.error, 
  Oligo = tidy(summary.lm(mod2.Oligo))$std.error))

# Apply mashr, using hthe canonical method
mash_data <- mash_set_data(mod2.bhat, mod2.se)
# canonical method
U.c <- cov_canonical(mash_data)
m.c <- mash(mash_data, U.c)
print(get_loglik(m.c),digits = 10)
m.c.pm <- get_pm(m.c)
m.c.sd <- get_psd(m.c)
get_lfsr(m.c)

# Forest plot for effect sizes
mash_plot <- data.frame(celltype = c("Exc", "Inh", "Astro", "MicroEndo", "Oligo"), Post_mean = m.c.pm[grep("dx", rownames(m.c.pm)),], Post_SD = m.c.sd[grep("dx", rownames(m.c.pm)),])
mash_plot$ci_l <- mash_plot$Post_mean - mash_plot$Post_SD
mash_plot$ci_u <- mash_plot$Post_mean + mash_plot$Post_SD
analysis_order <- c("Exc", "Inh", "Astro", "MicroEndo", "Oligo")
mash_plot$MatchAnalysis <- match(mash_plot$celltype, analysis_order)

mash_plot %>% mutate(celltype = fct_reorder(celltype, MatchAnalysis)) %>% ggplot(aes(y=celltype, x=Post_mean, xmin=ci_l, xmax=ci_u, colour = celltype))+ 
  geom_point() + 
  geom_errorbarh(height=.1) +
  ggtitle(paste("mashr posterior mean per cell-type, for diagnosis in ", studyn, sep = "")) +
  theme(legend.position = "none")
ggsave(paste(out_dir, "/", studyn, "_CTPeffect_mashr_cov.png", sep = ""))

7.1.2 LIBD

studyn <- "LIBD"

tmp <- ctp %>% filter(study == all_of(studyn))

# - run OLS --> clustered SE to account for correlation
mod2.Exc <- ols(Exc ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.Inh <- ols(Inh ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.Astro <- ols(NonN_Astro_FGF3R ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.MicroEndo <- ols(NonN_Micro.Endo_TYROBP ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.Oligo <- ols(NonN_Oligo_MBP ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)

mod2.bhat <- as.matrix(data.frame(
  Exc = mod2.Exc$coef, 
  Inh = mod2.Inh$coef,
  Astro = mod2.Astro$coef,
  MicroEndo = mod2.MicroEndo$coef,
  Oligo = mod2.Oligo$coef))

mod2.se <- as.matrix(data.frame(
  Exc = tidy(summary.lm(mod2.Exc))$std.error, 
  Inh = tidy(summary.lm(mod2.Inh))$std.error, 
  Astro = tidy(summary.lm(mod2.Astro))$std.error, 
  MicroEndo = tidy(summary.lm(mod2.MicroEndo))$std.error, 
  Oligo = tidy(summary.lm(mod2.Oligo))$std.error))

# Apply mashr, using hthe canonical method
mash_data <- mash_set_data(mod2.bhat, mod2.se)
# canonical method
U.c <- cov_canonical(mash_data)
m.c <- mash(mash_data, U.c)
print(get_loglik(m.c),digits = 10)
m.c.pm <- get_pm(m.c)
m.c.sd <- get_psd(m.c)
get_lfsr(m.c)

# Forest plot for effect sizes
mash_plot <- data.frame(celltype = c("Exc", "Inh", "Astro", "MicroEndo", "Oligo"), Post_mean = m.c.pm[grep("dx", rownames(m.c.pm)),], Post_SD = m.c.sd[grep("dx", rownames(m.c.pm)),])
mash_plot$ci_l <- mash_plot$Post_mean - mash_plot$Post_SD
mash_plot$ci_u <- mash_plot$Post_mean + mash_plot$Post_SD
analysis_order <- c("Exc", "Inh", "Astro", "MicroEndo", "Oligo")
mash_plot$MatchAnalysis <- match(mash_plot$celltype, analysis_order)
mash_plot %>% mutate(celltype = fct_reorder(celltype, MatchAnalysis)) %>% ggplot(aes(y=celltype, x=Post_mean, xmin=ci_l, xmax=ci_u, colour = celltype))+ 
  geom_point() + 
  geom_errorbarh(height=.1) +
  ggtitle(paste("mashr posterior mean per cell-type, for diagnosis in ", studyn, sep = "")) +
  theme(legend.position = "none")
ggsave(paste(out_dir, "/", studyn, "_CTPeffect_mashr_cov.png", sep = ""))

7.1.3 ROSMAP

studyn <- "ROSMAP"

tmp <- ctp %>% filter(study == all_of(studyn))

# - run OLS --> clustered SE to account for correlation
mod2.Exc <- ols(Exc ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.Inh <- ols(Inh ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.Astro <- ols(NonN_Astro_FGF3R ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.MicroEndo <- ols(NonN_Micro.Endo_TYROBP ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.Oligo <- ols(NonN_Oligo_MBP ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)

mod2.bhat <- as.matrix(data.frame(
  Exc = mod2.Exc$coef, 
  Inh = mod2.Inh$coef,
  Astro = mod2.Astro$coef,
  MicroEndo = mod2.MicroEndo$coef,
  Oligo = mod2.Oligo$coef))

mod2.se <- as.matrix(data.frame(
  Exc = tidy(summary.lm(mod2.Exc))$std.error, 
  Inh = tidy(summary.lm(mod2.Inh))$std.error, 
  Astro = tidy(summary.lm(mod2.Astro))$std.error, 
  MicroEndo = tidy(summary.lm(mod2.MicroEndo))$std.error, 
  Oligo = tidy(summary.lm(mod2.Oligo))$std.error))

# Apply mashr, using hthe canonical method
mash_data <- mash_set_data(mod2.bhat, mod2.se)
# canonical method
U.c <- cov_canonical(mash_data)
m.c <- mash(mash_data, U.c)
print(get_loglik(m.c),digits = 10)
m.c.pm <- get_pm(m.c)
m.c.sd <- get_psd(m.c)
get_lfsr(m.c)

# Forest plot for effect sizes
mash_plot <- data.frame(celltype = c("Exc", "Inh", "Astro", "MicroEndo", "Oligo"), Post_mean = m.c.pm[grep("dx", rownames(m.c.pm)),], Post_SD = m.c.sd[grep("dx", rownames(m.c.pm)),])
mash_plot$ci_l <- mash_plot$Post_mean - mash_plot$Post_SD
mash_plot$ci_u <- mash_plot$Post_mean + mash_plot$Post_SD
analysis_order <- c("Exc", "Inh", "Astro", "MicroEndo", "Oligo")
mash_plot$MatchAnalysis <- match(mash_plot$celltype, analysis_order)
mash_plot %>% mutate(celltype = fct_reorder(celltype, MatchAnalysis)) %>% ggplot(aes(y=celltype, x=Post_mean, xmin=ci_l, xmax=ci_u, colour = celltype))+ 
  geom_point() + 
  geom_errorbarh(height=.1) +
  ggtitle(paste("mashr posterior mean per cell-type, for diagnosis in ", studyn, sep = "")) +
  theme(legend.position = "none")
ggsave(paste(out_dir, "/", studyn, "_CTPeffect_mashr_cov.png", sep = ""))

7.2 Adjustment for oligodendrocytes

7.2.1 ASDbrain

7.2.1.1 No covariates

  • important for this study due to confounding of brain bank + age
studyn <- "ASDbrain"

tmp <- ctp_adjoligo %>% filter(study == all_of(studyn)) %>% filter(!IID %in% asd_exclude$IID)

# - run OLS --> clustered SE to account for correlation
mod2.Exc <- ols(Exc ~ dx, x = TRUE, y = TRUE, data = tmp)
mod2.Inh <- ols(Inh ~ dx, x = TRUE, y = TRUE, data = tmp)
mod2.Astro <- ols(NonN_Astro_FGF3R ~ dx, x = TRUE, y = TRUE, data = tmp)
mod2.MicroEndo <- ols(NonN_Micro.Endo_TYROBP ~ dx, x = TRUE, y = TRUE, data = tmp)

mod2.bhat <- as.matrix(data.frame(
  Exc = mod2.Exc$coef, 
  Inh = mod2.Inh$coef,
  Astro = mod2.Astro$coef,
  MicroEndo = mod2.MicroEndo$coef))

mod2.se <- as.matrix(data.frame(
  Exc = tidy(summary.lm(mod2.Exc))$std.error, 
  Inh = tidy(summary.lm(mod2.Inh))$std.error, 
  Astro = tidy(summary.lm(mod2.Astro))$std.error, 
  MicroEndo = tidy(summary.lm(mod2.MicroEndo))$std.error))

# Apply mashr, using hthe canonical method
mash_data <- mash_set_data(mod2.bhat, mod2.se)
# canonical method
U.c <- cov_canonical(mash_data)
m.c <- mash(mash_data, U.c)
print(get_loglik(m.c),digits = 10)
m.c.pm <- get_pm(m.c)
m.c.sd <- get_psd(m.c)
get_lfsr(m.c)

# Forest plot for effect sizes
mash_plot <- data.frame(celltype = c("Exc", "Inh", "Astro", "MicroEndo"), Post_mean = m.c.pm[grep("dx", rownames(m.c.pm)),], Post_SD = m.c.sd[grep("dx", rownames(m.c.pm)),])
mash_plot$ci_l <- mash_plot$Post_mean - mash_plot$Post_SD
mash_plot$ci_u <- mash_plot$Post_mean + mash_plot$Post_SD
analysis_order <- c("Exc", "Inh", "Astro", "MicroEndo")
mash_plot$MatchAnalysis <- match(mash_plot$celltype, analysis_order)

mash_plot %>% mutate(celltype = fct_reorder(celltype, MatchAnalysis)) %>% ggplot(aes(y=celltype, x=Post_mean, xmin=ci_l, xmax=ci_u, colour = celltype))+ 
  geom_point() + 
  geom_errorbarh(height=.1) +
  ggtitle(paste("mashr posterior mean per cell-type, for diagnosis in ", studyn, sep = "")) +
  theme(legend.position = "none")
ggsave(paste(out_dir, "/", studyn, "_CTPeffect_mashr_nocov.png", sep = ""))

7.2.1.2 Covariates

studyn <- "ASDbrain"

tmp <- ctp_adjoligo %>% filter(study == all_of(studyn)) %>% filter(!IID %in% asd_exclude$IID)

# - run OLS --> clustered SE to account for correlation
mod2.Exc <- ols(Exc ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.Inh <- ols(Inh ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.Astro <- ols(NonN_Astro_FGF3R ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.MicroEndo <- ols(NonN_Micro.Endo_TYROBP ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)

mod2.Exc.s <- robcov(mod2.Exc, cluster = tmp$IID)
mod2.Inh.s <- robcov(mod2.Inh, cluster = tmp$IID)
mod2.Astro.s <- robcov(mod2.Astro, cluster = tmp$IID)
mod2.MicroEndo.s <- robcov(mod2.MicroEndo, cluster = tmp$IID)

mod2.bhat <- as.matrix(data.frame(
  Exc = mod2.Exc.s$coef, 
  Inh = mod2.Inh.s$coef,
  Astro = mod2.Astro.s$coef,
  MicroEndo = mod2.MicroEndo.s$coef))

mod2.se <- as.matrix(data.frame(
  Exc = tidy(summary.lm(mod2.Exc))$std.error, 
  Inh = tidy(summary.lm(mod2.Inh))$std.error, 
  Astro = tidy(summary.lm(mod2.Astro))$std.error, 
  MicroEndo = tidy(summary.lm(mod2.MicroEndo))$std.error))

# Apply mashr, using hthe canonical method
mash_data <- mash_set_data(mod2.bhat, mod2.se)
# canonical method
U.c <- cov_canonical(mash_data)
m.c <- mash(mash_data, U.c)
print(get_loglik(m.c),digits = 10)
m.c.pm <- get_pm(m.c)
m.c.sd <- get_psd(m.c)
get_lfsr(m.c)

# Forest plot for effect sizes
mash_plot <- data.frame(celltype = c("Exc", "Inh", "Astro", "MicroEndo"), Post_mean = m.c.pm[grep("dx", rownames(m.c.pm)),], Post_SD = m.c.sd[grep("dx", rownames(m.c.pm)),])
mash_plot$ci_l <- mash_plot$Post_mean - mash_plot$Post_SD
mash_plot$ci_u <- mash_plot$Post_mean + mash_plot$Post_SD
analysis_order <- c("Exc", "Inh", "Astro", "MicroEndo")
mash_plot$MatchAnalysis <- match(mash_plot$celltype, analysis_order)

mash_plot %>% mutate(celltype = fct_reorder(celltype, MatchAnalysis)) %>% ggplot(aes(y=celltype, x=Post_mean, xmin=ci_l, xmax=ci_u, colour = celltype))+ 
  geom_point() + 
  geom_errorbarh(height=.1) +
  ggtitle(paste("mashr posterior mean per cell-type, for diagnosis in ", studyn, sep = "")) +
  theme(legend.position = "none")
ggsave(paste(out_dir, "/", studyn, "_CTPeffect_mashr_cov.png", sep = ""))

7.2.2 LIBD

studyn <- "LIBD"

tmp <- ctp_adjoligo %>% filter(study == all_of(studyn))

# - run OLS --> clustered SE to account for correlation
mod2.Exc <- ols(Exc ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.Inh <- ols(Inh ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.Astro <- ols(NonN_Astro_FGF3R ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.MicroEndo <- ols(NonN_Micro.Endo_TYROBP ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)

mod2.bhat <- as.matrix(data.frame(
  Exc = mod2.Exc$coef, 
  Inh = mod2.Inh$coef,
  Astro = mod2.Astro$coef,
  MicroEndo = mod2.MicroEndo$coef))

mod2.se <- as.matrix(data.frame(
  Exc = tidy(summary.lm(mod2.Exc))$std.error, 
  Inh = tidy(summary.lm(mod2.Inh))$std.error, 
  Astro = tidy(summary.lm(mod2.Astro))$std.error, 
  MicroEndo = tidy(summary.lm(mod2.MicroEndo))$std.error))

# Apply mashr, using hthe canonical method
mash_data <- mash_set_data(mod2.bhat, mod2.se)
# canonical method
U.c <- cov_canonical(mash_data)
m.c <- mash(mash_data, U.c)
print(get_loglik(m.c),digits = 10)
m.c.pm <- get_pm(m.c)
m.c.sd <- get_psd(m.c)
get_lfsr(m.c)

# Forest plot for effect sizes
mash_plot <- data.frame(celltype = c("Exc", "Inh", "Astro", "MicroEndo"), Post_mean = m.c.pm[grep("dx", rownames(m.c.pm)),], Post_SD = m.c.sd[grep("dx", rownames(m.c.pm)),])
mash_plot$ci_l <- mash_plot$Post_mean - mash_plot$Post_SD
mash_plot$ci_u <- mash_plot$Post_mean + mash_plot$Post_SD
analysis_order <- c("Exc", "Inh", "Astro", "MicroEndo")
mash_plot$MatchAnalysis <- match(mash_plot$celltype, analysis_order)
mash_plot %>% mutate(celltype = fct_reorder(celltype, MatchAnalysis)) %>% ggplot(aes(y=celltype, x=Post_mean, xmin=ci_l, xmax=ci_u, colour = celltype))+ 
  geom_point() + 
  geom_errorbarh(height=.1) +
  ggtitle(paste("mashr posterior mean per cell-type, for diagnosis in ", studyn, sep = "")) +
  theme(legend.position = "none")
ggsave(paste(out_dir, "/", studyn, "_CTPeffect_mashr_cov.png", sep = ""))

7.2.3 ROSMAP

studyn <- "ROSMAP"

tmp <- ctp_adjoligo %>% filter(study == all_of(studyn))

# - run OLS --> clustered SE to account for correlation
mod2.Exc <- ols(Exc ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.Inh <- ols(Inh ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.Astro <- ols(NonN_Astro_FGF3R ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)
mod2.MicroEndo <- ols(NonN_Micro.Endo_TYROBP ~ age + sex + batch + dx, x = TRUE, y = TRUE, data = tmp)

mod2.bhat <- as.matrix(data.frame(
  Exc = mod2.Exc$coef, 
  Inh = mod2.Inh$coef,
  Astro = mod2.Astro$coef,
  MicroEndo = mod2.MicroEndo$coef))

mod2.se <- as.matrix(data.frame(
  Exc = tidy(summary.lm(mod2.Exc))$std.error, 
  Inh = tidy(summary.lm(mod2.Inh))$std.error, 
  Astro = tidy(summary.lm(mod2.Astro))$std.error, 
  MicroEndo = tidy(summary.lm(mod2.MicroEndo))$std.error))

# Apply mashr, using hthe canonical method
mash_data <- mash_set_data(mod2.bhat, mod2.se)
# canonical method
U.c <- cov_canonical(mash_data)
m.c <- mash(mash_data, U.c)
print(get_loglik(m.c),digits = 10)
m.c.pm <- get_pm(m.c)
m.c.sd <- get_psd(m.c)
get_lfsr(m.c)

# Forest plot for effect sizes
mash_plot <- data.frame(celltype = c("Exc", "Inh", "Astro", "MicroEndo"), Post_mean = m.c.pm[grep("dx", rownames(m.c.pm)),], Post_SD = m.c.sd[grep("dx", rownames(m.c.pm)),])
mash_plot$ci_l <- mash_plot$Post_mean - mash_plot$Post_SD
mash_plot$ci_u <- mash_plot$Post_mean + mash_plot$Post_SD
analysis_order <- c("Exc", "Inh", "Astro", "MicroEndo")
mash_plot$MatchAnalysis <- match(mash_plot$celltype, analysis_order)
mash_plot %>% mutate(celltype = fct_reorder(celltype, MatchAnalysis)) %>% ggplot(aes(y=celltype, x=Post_mean, xmin=ci_l, xmax=ci_u, colour = celltype))+ 
  geom_point() + 
  geom_errorbarh(height=.1) +
  ggtitle(paste("mashr posterior mean per cell-type, for diagnosis in ", studyn, sep = "")) +
  theme(legend.position = "none")
ggsave(paste(out_dir, "/", studyn, "_CTPeffect_mashr_cov.png", sep = ""))

8 Linear models for individual cell-type differences

8.1 Sub-types

8.1.1 clr, no adjustment for oligodendrocytes

8.1.1.1 ASDbrain

summary(lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ dx, data = ctp.clr.1e3 %>% filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID)))
## Response Exc :
## 
## Call:
## lm(formula = Exc ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.79517 -0.10083  0.05935  0.14285  0.32690 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.31174    0.04397   7.090 2.46e-09 ***
## dxCTL        0.11939    0.06444   1.853   0.0692 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2448 on 56 degrees of freedom
## Multiple R-squared:  0.05775,    Adjusted R-squared:  0.04092 
## F-statistic: 3.432 on 1 and 56 DF,  p-value: 0.06921
## 
## 
## Response Inh :
## 
## Call:
## lm(formula = Inh ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.230859 -0.094632 -0.002926  0.105418  0.223843 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.32569    0.02151  15.139   <2e-16 ***
## dxCTL       -0.01236    0.03153  -0.392    0.697    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1198 on 56 degrees of freedom
## Multiple R-squared:  0.002735,   Adjusted R-squared:  -0.01507 
## F-statistic: 0.1536 on 1 and 56 DF,  p-value: 0.6966
## 
## 
## Response Astro :
## 
## Call:
## lm(formula = Astro ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.49194 -0.09152  0.02459  0.13428  0.29991 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.227494   0.030523   7.453 6.17e-10 ***
## dxCTL       0.008671   0.044736   0.194    0.847    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1699 on 56 degrees of freedom
## Multiple R-squared:  0.0006704,  Adjusted R-squared:  -0.01717 
## F-statistic: 0.03757 on 1 and 56 DF,  p-value: 0.847
## 
## 
## Response Endo :
## 
## Call:
## lm(formula = Endo ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.12401 -0.03931 -0.00047  0.03740  0.21080 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.54316    0.01215 -44.700   <2e-16 ***
## dxCTL       -0.02108    0.01781  -1.184    0.242    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06766 on 56 degrees of freedom
## Multiple R-squared:  0.0244, Adjusted R-squared:  0.006983 
## F-statistic: 1.401 on 1 and 56 DF,  p-value: 0.2416
## 
## 
## Response Micro :
## 
## Call:
## lm(formula = Micro ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.24394 -0.07252 -0.02098  0.06577  0.25309 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.47378    0.01840 -25.747  < 2e-16 ***
## dxCTL       -0.08367    0.02697  -3.102  0.00301 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1025 on 56 degrees of freedom
## Multiple R-squared:  0.1467, Adjusted R-squared:  0.1314 
## F-statistic: 9.624 on 1 and 56 DF,  p-value: 0.003007
## 
## 
## Response Oligo :
## 
## Call:
## lm(formula = Oligo ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.59523 -0.22961 -0.08931  0.21070  0.78627 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.91840    0.06165  14.897   <2e-16 ***
## dxCTL        0.02520    0.09035   0.279    0.781    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3432 on 56 degrees of freedom
## Multiple R-squared:  0.001387,   Adjusted R-squared:  -0.01644 
## F-statistic: 0.0778 on 1 and 56 DF,  p-value: 0.7813
## 
## 
## Response OPC :
## 
## Call:
## lm(formula = OPC ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.16502 -0.05990 -0.02423  0.04531  0.29068 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.76639    0.01748 -43.849   <2e-16 ***
## dxCTL       -0.03616    0.02562  -1.412    0.164    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09731 on 56 degrees of freedom
## Multiple R-squared:  0.03436,    Adjusted R-squared:  0.01711 
## F-statistic: 1.992 on 1 and 56 DF,  p-value: 0.1636
summary(lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID)))
## Response Exc :
## 
## Call:
## lm(formula = Exc ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.75304 -0.07787  0.05203  0.15367  0.36890 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)     0.1006606  0.2461906   0.409    0.684
## dxCTL           0.1073554  0.0701213   1.531    0.132
## age             0.0187009  0.0124363   1.504    0.139
## age2           -0.0002351  0.0001525  -1.542    0.129
## sexM           -0.1110047  0.0775934  -1.431    0.159
## batchASDbrain2 -0.0866973  0.1047283  -0.828    0.412
## 
## Residual standard error: 0.2407 on 52 degrees of freedom
## Multiple R-squared:  0.1545, Adjusted R-squared:  0.07319 
## F-statistic:   1.9 on 5 and 52 DF,  p-value: 0.1102
## 
## 
## Response Inh :
## 
## Call:
## lm(formula = Inh ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23036 -0.09011 -0.01411  0.09828  0.25067 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)     3.352e-01  1.244e-01   2.694  0.00947 **
## dxCTL          -1.260e-02  3.543e-02  -0.356  0.72357   
## age            -1.054e-03  6.284e-03  -0.168  0.86746   
## age2            3.147e-06  7.708e-05   0.041  0.96759   
## sexM            3.917e-02  3.921e-02   0.999  0.32242   
## batchASDbrain2 -3.709e-02  5.292e-02  -0.701  0.48645   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1216 on 52 degrees of freedom
## Multiple R-squared:  0.04569,    Adjusted R-squared:  -0.04607 
## F-statistic: 0.4979 on 5 and 52 DF,  p-value: 0.7763
## 
## 
## Response Astro :
## 
## Call:
## lm(formula = Astro ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.49340 -0.09444  0.04083  0.12648  0.25435 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)     2.039e-01  1.784e-01   1.142    0.258
## dxCTL           2.129e-03  5.083e-02   0.042    0.967
## age             2.438e-03  9.014e-03   0.270    0.788
## age2           -1.959e-05  1.106e-04  -0.177    0.860
## sexM           -4.415e-02  5.624e-02  -0.785    0.436
## batchASDbrain2  6.332e-03  7.591e-02   0.083    0.934
## 
## Residual standard error: 0.1744 on 52 degrees of freedom
## Multiple R-squared:  0.0224, Adjusted R-squared:  -0.0716 
## F-statistic: 0.2383 on 5 and 52 DF,  p-value: 0.9437
## 
## 
## Response Endo :
## 
## Call:
## lm(formula = Endo ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.110949 -0.046803  0.000079  0.026488  0.230405 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -4.391e-01  6.898e-02  -6.365 5.08e-08 ***
## dxCTL          -1.515e-02  1.965e-02  -0.771    0.444    
## age            -5.264e-03  3.485e-03  -1.510    0.137    
## age2            5.386e-05  4.274e-05   1.260    0.213    
## sexM            2.177e-03  2.174e-02   0.100    0.921    
## batchASDbrain2  9.592e-03  2.934e-02   0.327    0.745    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06743 on 52 degrees of freedom
## Multiple R-squared:  0.1001, Adjusted R-squared:  0.01359 
## F-statistic: 1.157 on 5 and 52 DF,  p-value: 0.3428
## 
## 
## Response Micro :
## 
## Call:
## lm(formula = Micro ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.242322 -0.058188 -0.002788  0.055054  0.267380 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    -2.473e-01  9.416e-02  -2.626   0.0113 *
## dxCTL          -6.358e-02  2.682e-02  -2.371   0.0215 *
## age            -1.167e-02  4.756e-03  -2.454   0.0175 *
## age2            1.115e-04  5.834e-05   1.911   0.0616 .
## sexM            2.363e-02  2.968e-02   0.796   0.4295  
## batchASDbrain2  2.418e-02  4.005e-02   0.604   0.5486  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09204 on 52 degrees of freedom
## Multiple R-squared:  0.3605, Adjusted R-squared:  0.2991 
## F-statistic: 5.864 on 5 and 52 DF,  p-value: 0.000228
## 
## 
## Response Oligo :
## 
## Call:
## lm(formula = Oligo ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54230 -0.24729 -0.08104  0.17814  0.81947 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    7.833e-01  3.583e-01   2.186   0.0333 *
## dxCTL          1.866e-02  1.021e-01   0.183   0.8556  
## age            1.269e-04  1.810e-02   0.007   0.9944  
## age2           3.754e-05  2.220e-04   0.169   0.8664  
## sexM           7.835e-02  1.129e-01   0.694   0.4909  
## batchASDbrain2 5.747e-02  1.524e-01   0.377   0.7077  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3502 on 52 degrees of freedom
## Multiple R-squared:  0.0345, Adjusted R-squared:  -0.05834 
## F-statistic: 0.3716 on 5 and 52 DF,  p-value: 0.8658
## 
## 
## Response OPC :
## 
## Call:
## lm(formula = OPC ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.20572 -0.05836 -0.01291  0.05016  0.28057 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -7.366e-01  1.016e-01  -7.250 1.98e-09 ***
## dxCTL          -3.681e-02  2.894e-02  -1.272    0.209    
## age            -3.277e-03  5.133e-03  -0.638    0.526    
## age2            4.871e-05  6.296e-05   0.774    0.443    
## sexM            1.184e-02  3.202e-02   0.370    0.713    
## batchASDbrain2  2.622e-02  4.322e-02   0.607    0.547    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09932 on 52 degrees of freedom
## Multiple R-squared:  0.06595,    Adjusted R-squared:  -0.02386 
## F-statistic: 0.7343 on 5 and 52 DF,  p-value: 0.601

8.1.1.2 LIBD

summary(lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ dx, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID)))
## Response Exc :
## 
## Call:
## lm(formula = Exc ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.85572 -0.08622  0.05281  0.17875  0.63801 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.65077    0.02348  27.711   <2e-16 ***
## dxSCZ        0.02033    0.03462   0.587    0.557    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3459 on 400 degrees of freedom
## Multiple R-squared:  0.0008616,  Adjusted R-squared:  -0.001636 
## F-statistic: 0.345 on 1 and 400 DF,  p-value: 0.5573
## 
## 
## Response Inh :
## 
## Call:
## lm(formula = Inh ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.259420 -0.054351  0.003769  0.052136  0.252121 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.605642   0.006008 100.802   <2e-16 ***
## dxSCZ       -0.002959   0.008857  -0.334    0.738    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08851 on 400 degrees of freedom
## Multiple R-squared:  0.000279,   Adjusted R-squared:  -0.00222 
## F-statistic: 0.1116 on 1 and 400 DF,  p-value: 0.7385
## 
## 
## Response Astro :
## 
## Call:
## lm(formula = Astro ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.52208 -0.08499  0.00482  0.09322  0.51153 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.53231    0.01070  49.758   <2e-16 ***
## dxSCZ        0.01860    0.01577   1.179    0.239    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1576 on 400 degrees of freedom
## Multiple R-squared:  0.003465,   Adjusted R-squared:  0.0009738 
## F-statistic: 1.391 on 1 and 400 DF,  p-value: 0.239
## 
## 
## Response Endo :
## 
## Call:
## lm(formula = Endo ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.21324 -0.05900 -0.01539  0.04361  0.44679 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.718328   0.006215 -115.57   <2e-16 ***
## dxSCZ        0.004948   0.009162    0.54    0.589    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09156 on 400 degrees of freedom
## Multiple R-squared:  0.0007287,  Adjusted R-squared:  -0.00177 
## F-statistic: 0.2917 on 1 and 400 DF,  p-value: 0.5895
## 
## 
## Response Micro :
## 
## Call:
## lm(formula = Micro ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.47252 -0.09892 -0.00463  0.07610  0.60643 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.82969    0.01075 -77.195  < 2e-16 ***
## dxSCZ       -0.04873    0.01584  -3.075  0.00225 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1583 on 400 degrees of freedom
## Multiple R-squared:  0.0231, Adjusted R-squared:  0.02066 
## F-statistic: 9.459 on 1 and 400 DF,  p-value: 0.002246
## 
## 
## Response Oligo :
## 
## Call:
## lm(formula = Oligo ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.83510 -0.15758 -0.00938  0.15574  0.91863 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.57915    0.01766  89.399  < 2e-16 ***
## dxSCZ       -0.07306    0.02604  -2.806  0.00526 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2602 on 400 degrees of freedom
## Multiple R-squared:  0.0193, Adjusted R-squared:  0.01685 
## F-statistic: 7.874 on 1 and 400 DF,  p-value: 0.005262
## 
## 
## Response OPC :
## 
## Call:
## lm(formula = OPC ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.36419 -0.14716  0.00457  0.13405  0.89596 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.81985    0.01861 -97.790  < 2e-16 ***
## dxSCZ        0.08087    0.02743   2.948  0.00339 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2741 on 400 degrees of freedom
## Multiple R-squared:  0.02126,    Adjusted R-squared:  0.01882 
## F-statistic: 8.691 on 1 and 400 DF,  p-value: 0.003386
summary(lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID)))
## Response Exc :
## 
## Call:
## lm(formula = Exc ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.89166 -0.09513  0.05729  0.19200  0.60517 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      4.556e-01  1.438e-01   3.168  0.00166 **
## dxSCZ            2.288e-02  3.881e-02   0.590  0.55577   
## age              9.737e-03  5.428e-03   1.794  0.07360 . 
## age2            -8.868e-05  5.529e-05  -1.604  0.10952   
## sexM            -8.203e-03  3.746e-02  -0.219  0.82677   
## batchLieber_244 -7.559e-02  7.028e-02  -1.075  0.28281   
## batchLieber_289 -1.586e-02  7.126e-02  -0.223  0.82403   
## batchLieber_30  -1.037e-01  1.001e-01  -1.036  0.30083   
## batchLieber_315  8.656e-03  7.762e-02   0.112  0.91126   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3447 on 393 degrees of freedom
## Multiple R-squared:  0.02524,    Adjusted R-squared:  0.0054 
## F-statistic: 1.272 on 8 and 393 DF,  p-value: 0.2564
## 
## 
## Response Inh :
## 
## Call:
## lm(formula = Inh ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.220296 -0.054465  0.000495  0.054463  0.230232 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      6.029e-01  3.565e-02  16.912   <2e-16 ***
## dxSCZ            5.504e-03  9.621e-03   0.572   0.5676    
## age              5.937e-04  1.346e-03   0.441   0.6593    
## age2            -1.573e-05  1.371e-05  -1.147   0.2519    
## sexM             9.236e-03  9.286e-03   0.995   0.3206    
## batchLieber_244  1.610e-02  1.742e-02   0.924   0.3561    
## batchLieber_289  1.142e-02  1.767e-02   0.647   0.5183    
## batchLieber_30  -1.586e-02  2.480e-02  -0.639   0.5229    
## batchLieber_315 -3.839e-02  1.924e-02  -1.995   0.0467 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08546 on 393 degrees of freedom
## Multiple R-squared:  0.08422,    Adjusted R-squared:  0.06558 
## F-statistic: 4.518 on 8 and 393 DF,  p-value: 2.861e-05
## 
## 
## Response Astro :
## 
## Call:
## lm(formula = Astro ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48662 -0.08547  0.01077  0.09506  0.50878 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      4.929e-01  6.464e-02   7.625 1.85e-13 ***
## dxSCZ            2.162e-02  1.745e-02   1.239   0.2160    
## age              2.947e-03  2.440e-03   1.208   0.2278    
## age2            -3.570e-05  2.485e-05  -1.436   0.1517    
## sexM            -2.300e-02  1.684e-02  -1.366   0.1727    
## batchLieber_244 -2.219e-02  3.159e-02  -0.702   0.4829    
## batchLieber_289  1.247e-02  3.203e-02   0.389   0.6973    
## batchLieber_30  -3.146e-02  4.498e-02  -0.699   0.4847    
## batchLieber_315  6.515e-02  3.489e-02   1.867   0.0626 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.155 on 393 degrees of freedom
## Multiple R-squared:  0.05329,    Adjusted R-squared:  0.03402 
## F-statistic: 2.765 on 8 and 393 DF,  p-value: 0.005536
## 
## 
## Response Endo :
## 
## Call:
## lm(formula = Endo ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.24321 -0.05275 -0.00806  0.04276  0.44940 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -6.284e-01  3.558e-02 -17.665  < 2e-16 ***
## dxSCZ            1.565e-02  9.601e-03   1.630 0.103876    
## age             -2.784e-03  1.343e-03  -2.073 0.038822 *  
## age2             1.044e-05  1.368e-05   0.763 0.445679    
## sexM            -2.729e-02  9.267e-03  -2.945 0.003426 ** 
## batchLieber_244  1.509e-02  1.739e-02   0.868 0.385892    
## batchLieber_289  6.195e-02  1.763e-02   3.514 0.000492 ***
## batchLieber_30  -2.139e-02  2.475e-02  -0.864 0.388037    
## batchLieber_315  4.435e-02  1.920e-02   2.309 0.021447 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08528 on 393 degrees of freedom
## Multiple R-squared:  0.1482, Adjusted R-squared:  0.1309 
## F-statistic: 8.548 on 8 and 393 DF,  p-value: 9.473e-11
## 
## 
## Response Micro :
## 
## Call:
## lm(formula = Micro ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39012 -0.07686 -0.02074  0.06882  0.57314 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -5.756e-01  5.833e-02  -9.868  < 2e-16 ***
## dxSCZ           -2.616e-02  1.574e-02  -1.662   0.0974 .  
## age             -9.533e-03  2.202e-03  -4.330  1.9e-05 ***
## age2             5.201e-05  2.243e-05   2.319   0.0209 *  
## sexM             2.482e-02  1.519e-02   1.634   0.1031    
## batchLieber_244  4.813e-02  2.851e-02   1.688   0.0922 .  
## batchLieber_289  3.899e-02  2.891e-02   1.349   0.1781    
## batchLieber_30   6.268e-02  4.058e-02   1.544   0.1233    
## batchLieber_315  3.401e-02  3.149e-02   1.080   0.2807    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1398 on 393 degrees of freedom
## Multiple R-squared:  0.2514, Adjusted R-squared:  0.2361 
## F-statistic:  16.5 on 8 and 393 DF,  p-value: < 2.2e-16
## 
## 
## Response Oligo :
## 
## Call:
## lm(formula = Oligo ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76499 -0.16800  0.00177  0.14269  0.90854 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.498e+00  1.024e-01  14.624  < 2e-16 ***
## dxSCZ           -9.644e-02  2.765e-02  -3.488 0.000542 ***
## age              2.850e-03  3.867e-03   0.737 0.461515    
## age2             5.617e-06  3.939e-05   0.143 0.886683    
## sexM             2.242e-02  2.669e-02   0.840 0.401412    
## batchLieber_244 -3.302e-02  5.007e-02  -0.659 0.510038    
## batchLieber_289 -8.538e-02  5.077e-02  -1.682 0.093417 .  
## batchLieber_30   5.682e-02  7.128e-02   0.797 0.425814    
## batchLieber_315 -2.445e-01  5.530e-02  -4.421 1.27e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2456 on 393 degrees of freedom
## Multiple R-squared:  0.1417, Adjusted R-squared:  0.1242 
## F-statistic:  8.11 on 8 and 393 DF,  p-value: 3.717e-10
## 
## 
## Response OPC :
## 
## Call:
## lm(formula = OPC ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.37313 -0.14144 -0.00242  0.14589  0.87972 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.846e+00  1.120e-01 -16.471   <2e-16 ***
## dxSCZ            5.694e-02  3.024e-02   1.883   0.0604 .  
## age             -3.811e-03  4.229e-03  -0.901   0.3681    
## age2             7.204e-05  4.308e-05   1.672   0.0953 .  
## sexM             2.019e-03  2.919e-02   0.069   0.9449    
## batchLieber_244  5.147e-02  5.476e-02   0.940   0.3478    
## batchLieber_289 -2.360e-02  5.553e-02  -0.425   0.6710    
## batchLieber_30   5.286e-02  7.796e-02   0.678   0.4981    
## batchLieber_315  1.307e-01  6.048e-02   2.161   0.0313 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2686 on 393 degrees of freedom
## Multiple R-squared:  0.07684,    Adjusted R-squared:  0.05805 
## F-statistic: 4.089 on 8 and 393 DF,  p-value: 0.0001072

Check confounding of batch for Exc and Oligo

## Check per-batch effect for Exc and Oligo

summary(lm(Micro ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch == "Lieber_289")))
## 
## Call:
## lm(formula = Micro ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch == "Lieber_289"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39670 -0.07215  0.00550  0.06004  0.53388 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.328e-01  5.567e-02  -9.571 3.77e-16 ***
## dxSCZ        1.114e-04  2.564e-02   0.004  0.99654    
## age         -1.002e-02  2.166e-03  -4.626 1.02e-05 ***
## age2         6.073e-05  2.176e-05   2.790  0.00621 ** 
## sexM         5.529e-04  2.496e-02   0.022  0.98237    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1258 on 110 degrees of freedom
## Multiple R-squared:  0.3358, Adjusted R-squared:  0.3117 
## F-statistic:  13.9 on 4 and 110 DF,  p-value: 3.276e-09
summary(lm(Micro ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch == "Lieber_315")))
## 
## Call:
## lm(formula = Micro ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch == "Lieber_315"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.25066 -0.06464 -0.00835  0.06299  0.41720 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.435e-01  5.027e-02 -12.800  < 2e-16 ***
## dxSCZ        4.778e-03  3.301e-02   0.145  0.88534    
## age         -7.058e-03  2.327e-03  -3.033  0.00344 ** 
## age2         2.982e-05  3.171e-05   0.940  0.35039    
## sexM         6.238e-02  3.296e-02   1.893  0.06271 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1198 on 67 degrees of freedom
## Multiple R-squared:  0.4637, Adjusted R-squared:  0.4317 
## F-statistic: 14.48 on 4 and 67 DF,  p-value: 1.421e-08
print("N.B. Lieber_104 has CONTROLS ONLY")
## [1] "N.B. Lieber_104 has CONTROLS ONLY"
summary(lm(Micro ~ age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch == "Lieber_104")))
## 
## Call:
## lm(formula = Micro ~ age + age2 + sex, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch == "Lieber_104"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41544 -0.06913  0.00189  0.06950  0.44253 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.711e-01  5.488e-02  -8.584 5.95e-11 ***
## age         -1.117e-02  3.263e-03  -3.423  0.00135 ** 
## age2         6.016e-05  4.239e-05   1.419  0.16288    
## sexM        -6.356e-02  4.335e-02  -1.466  0.14973    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1493 on 44 degrees of freedom
## Multiple R-squared:  0.5886, Adjusted R-squared:  0.5605 
## F-statistic: 20.98 on 3 and 44 DF,  p-value: 1.372e-08
print("N.B. Lieber_30 has SCHIZOPHRENIA PATIENTS ONLY")
## [1] "N.B. Lieber_30 has SCHIZOPHRENIA PATIENTS ONLY"
summary(lm(Micro ~ age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch == "Lieber_30")))
## 
## Call:
## lm(formula = Micro ~ age + age2 + sex, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch == "Lieber_30"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.17713 -0.07667 -0.05847  0.04542  0.43153 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.6735907  0.3952967  -1.704    0.102
## age          0.0027987  0.0172763   0.162    0.873
## age2        -0.0001306  0.0001841  -0.710    0.485
## sexM        -0.0004509  0.0717097  -0.006    0.995
## 
## Residual standard error: 0.1586 on 22 degrees of freedom
## Multiple R-squared:  0.3834, Adjusted R-squared:  0.2993 
## F-statistic: 4.559 on 3 and 22 DF,  p-value: 0.01248
summary(lm(Micro ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch == "Lieber_244")))
## 
## Call:
## lm(formula = Micro ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch == "Lieber_244"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.29803 -0.06965 -0.01315  0.05126  0.60667 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.249e-01  3.578e-02 -14.668  < 2e-16 ***
## dxSCZ       -5.587e-02  2.113e-02  -2.645   0.0088 ** 
## age         -9.420e-03  2.033e-03  -4.634 6.31e-06 ***
## age2         4.782e-05  2.807e-05   1.704   0.0899 .  
## sexM         4.416e-02  2.075e-02   2.128   0.0345 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1341 on 209 degrees of freedom
## Multiple R-squared:  0.4798, Adjusted R-squared:  0.4698 
## F-statistic: 48.19 on 4 and 209 DF,  p-value: < 2.2e-16
summary(lm(Micro ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", "Lieber_244", "Lieber_315"))))
## 
## Call:
## lm(formula = Micro ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", 
##     "Lieber_244", "Lieber_315")))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39166 -0.07472 -0.01655  0.06350  0.56942 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -5.280e-01  2.497e-02 -21.142  < 2e-16 ***
## dxSCZ           -2.908e-02  1.457e-02  -1.995   0.0467 *  
## age             -9.682e-03  1.125e-03  -8.604  < 2e-16 ***
## age2             5.650e-05  1.372e-05   4.120 4.63e-05 ***
## sexM             2.833e-02  1.425e-02   1.988   0.0475 *  
## batchLieber_289 -1.395e-02  1.583e-02  -0.881   0.3787    
## batchLieber_315 -2.622e-02  1.787e-02  -1.468   0.1430    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1299 on 394 degrees of freedom
## Multiple R-squared:  0.4525, Adjusted R-squared:  0.4442 
## F-statistic: 54.28 on 6 and 394 DF,  p-value: < 2.2e-16
summary(lm(Micro ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", "Lieber_244", "Lieber_315", "Lieber_30"))))
## 
## Call:
## lm(formula = Micro ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", 
##     "Lieber_244", "Lieber_315", "Lieber_30")))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39230 -0.07362 -0.01567  0.06141  0.57427 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -5.292e-01  2.518e-02 -21.018  < 2e-16 ***
## dxSCZ           -2.703e-02  1.477e-02  -1.830 0.067927 .  
## age             -9.485e-03  1.139e-03  -8.328 1.18e-15 ***
## age2             5.213e-05  1.382e-05   3.773 0.000184 ***
## sexM             2.897e-02  1.405e-02   2.062 0.039798 *  
## batchLieber_289 -1.166e-02  1.605e-02  -0.726 0.468092    
## batchLieber_30   1.178e-02  2.876e-02   0.409 0.682422    
## batchLieber_315 -2.509e-02  1.815e-02  -1.382 0.167639    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1321 on 419 degrees of freedom
## Multiple R-squared:  0.4411, Adjusted R-squared:  0.4317 
## F-statistic: 47.24 on 7 and 419 DF,  p-value: < 2.2e-16
summary(lm(Micro ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", "Lieber_244", "Lieber_315", "Lieber_30", "Lieber_104"))))
## 
## Call:
## lm(formula = Micro ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", 
##     "Lieber_244", "Lieber_315", "Lieber_30", "Lieber_104")))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39040 -0.07488 -0.01407  0.06228  0.56968 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -5.477e-01  2.704e-02 -20.255  < 2e-16 ***
## dxSCZ           -2.514e-02  1.477e-02  -1.701   0.0895 .  
## age             -9.866e-03  1.071e-03  -9.210  < 2e-16 ***
## age2             5.427e-05  1.316e-05   4.123 4.42e-05 ***
## sexM             1.775e-02  1.341e-02   1.324   0.1861    
## batchLieber_244  3.640e-02  2.278e-02   1.598   0.1108    
## batchLieber_289  2.585e-02  2.416e-02   1.070   0.2853    
## batchLieber_30   4.897e-02  3.613e-02   1.355   0.1760    
## batchLieber_315  1.164e-02  2.595e-02   0.448   0.6540    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1343 on 466 degrees of freedom
## Multiple R-squared:  0.4566, Adjusted R-squared:  0.4473 
## F-statistic: 48.94 on 8 and 466 DF,  p-value: < 2.2e-16
summary(lm(Micro ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", "Lieber_244", "Lieber_315", "Lieber_30", "Lieber_104"))))
## 
## Call:
## lm(formula = Micro ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", 
##     "Lieber_244", "Lieber_315", "Lieber_30", "Lieber_104")))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41186 -0.07334 -0.01357  0.06290  0.57462 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.271e-01  2.221e-02 -23.727  < 2e-16 ***
## dxSCZ       -1.690e-02  1.364e-02  -1.240 0.215696    
## age         -9.640e-03  1.063e-03  -9.069  < 2e-16 ***
## age2         5.018e-05  1.293e-05   3.880 0.000119 ***
## sexM         2.140e-02  1.323e-02   1.618 0.106399    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1343 on 470 degrees of freedom
## Multiple R-squared:  0.4519, Adjusted R-squared:  0.4472 
## F-statistic: 96.87 on 4 and 470 DF,  p-value: < 2.2e-16
summary(lm(Oligo ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch == "Lieber_289")))
## 
## Call:
## lm(formula = Oligo ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch == "Lieber_289"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.47414 -0.14351 -0.02345  0.12643  0.84804 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.716e-01  1.083e-01   8.974 8.72e-15 ***
## dxSCZ       -1.034e-02  4.986e-02  -0.207  0.83611    
## age          1.817e-02  4.213e-03   4.313 3.52e-05 ***
## age2        -1.332e-04  4.232e-05  -3.146  0.00213 ** 
## sexM         4.765e-02  4.855e-02   0.982  0.32845    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2446 on 110 degrees of freedom
## Multiple R-squared:  0.216,  Adjusted R-squared:  0.1874 
## F-statistic: 7.575 on 4 and 110 DF,  p-value: 1.988e-05
summary(lm(Oligo ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch == "Lieber_315")))
## 
## Call:
## lm(formula = Oligo ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch == "Lieber_315"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.70115 -0.10991  0.00296  0.15360  0.48191 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.708e-01  1.021e-01   8.531 2.65e-12 ***
## dxSCZ       -1.460e-01  6.702e-02  -2.179 0.032884 *  
## age          1.803e-02  4.724e-03   3.816 0.000298 ***
## age2        -1.242e-04  6.439e-05  -1.929 0.057950 .  
## sexM         1.209e-02  6.692e-02   0.181 0.857207    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2432 on 67 degrees of freedom
## Multiple R-squared:  0.3783, Adjusted R-squared:  0.3412 
## F-statistic: 10.19 on 4 and 67 DF,  p-value: 1.662e-06
print("N.B. Lieber_104 has CONTROLS ONLY")
## [1] "N.B. Lieber_104 has CONTROLS ONLY"
summary(lm(Oligo ~ age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch == "Lieber_104")))
## 
## Call:
## lm(formula = Oligo ~ age + age2 + sex, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch == "Lieber_104"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63239 -0.19849 -0.00609  0.11907  1.07282 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.174e+00  1.146e-01  10.248 3.13e-13 ***
## age          1.548e-02  6.812e-03   2.273    0.028 *  
## age2        -9.495e-05  8.849e-05  -1.073    0.289    
## sexM        -4.269e-02  9.050e-02  -0.472    0.639    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3116 on 44 degrees of freedom
## Multiple R-squared:  0.3465, Adjusted R-squared:  0.302 
## F-statistic: 7.777 on 3 and 44 DF,  p-value: 0.0002838
print("N.B. Lieber_30 has SCHIZOPHRENIA PATIENTS ONLY")
## [1] "N.B. Lieber_30 has SCHIZOPHRENIA PATIENTS ONLY"
summary(lm(Oligo ~ age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch == "Lieber_30")))
## 
## Call:
## lm(formula = Oligo ~ age + age2 + sex, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch == "Lieber_30"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.29493 -0.15638 -0.00970  0.08445  0.56092 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  1.487e+00  5.511e-01   2.698   0.0131 *
## age          7.076e-03  2.409e-02   0.294   0.7717  
## age2        -9.693e-05  2.567e-04  -0.378   0.7093  
## sexM         3.905e-02  9.998e-02   0.391   0.6998  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2212 on 22 degrees of freedom
## Multiple R-squared:  0.03444,    Adjusted R-squared:  -0.09723 
## F-statistic: 0.2616 on 3 and 22 DF,  p-value: 0.8523
summary(lm(Oligo ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch == "Lieber_244")))
## 
## Call:
## lm(formula = Oligo ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch == "Lieber_244"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84089 -0.21869 -0.00294  0.16638  0.83055 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.186e+00  7.438e-02  15.945  < 2e-16 ***
## dxSCZ       -1.471e-01  4.392e-02  -3.349 0.000962 ***
## age          1.978e-02  4.225e-03   4.682  5.1e-06 ***
## age2        -1.958e-04  5.835e-05  -3.356 0.000940 ***
## sexM         4.404e-03  4.314e-02   0.102 0.918770    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2788 on 209 degrees of freedom
## Multiple R-squared:  0.1608, Adjusted R-squared:  0.1447 
## F-statistic: 10.01 on 4 and 209 DF,  p-value: 1.981e-07
summary(lm(Oligo ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", "Lieber_244", "Lieber_315"))))
## 
## Call:
## lm(formula = Oligo ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", 
##     "Lieber_244", "Lieber_315")))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.81279 -0.18552 -0.00944  0.15322  0.89923 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.177e+00  5.095e-02  23.089  < 2e-16 ***
## dxSCZ           -1.103e-01  2.973e-02  -3.710 0.000237 ***
## age              1.623e-02  2.296e-03   7.068 7.19e-12 ***
## age2            -1.248e-04  2.798e-05  -4.460 1.07e-05 ***
## sexM             1.572e-02  2.908e-02   0.540 0.589200    
## batchLieber_289 -6.742e-02  3.229e-02  -2.088 0.037474 *  
## batchLieber_315 -2.450e-01  3.646e-02  -6.720 6.37e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2651 on 394 degrees of freedom
## Multiple R-squared:  0.2497, Adjusted R-squared:  0.2382 
## F-statistic: 21.85 on 6 and 394 DF,  p-value: < 2.2e-16
summary(lm(Oligo ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", "Lieber_244", "Lieber_315", "Lieber_30"))))
## 
## Call:
## lm(formula = Oligo ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", 
##     "Lieber_244", "Lieber_315", "Lieber_30")))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.80811 -0.18776 -0.00251  0.14508  0.90303 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.175e+00  5.010e-02  23.453  < 2e-16 ***
## dxSCZ           -1.068e-01  2.939e-02  -3.634 0.000313 ***
## age              1.634e-02  2.266e-03   7.208 2.66e-12 ***
## age2            -1.289e-04  2.749e-05  -4.687 3.75e-06 ***
## sexM             1.988e-02  2.795e-02   0.711 0.477451    
## batchLieber_289 -6.412e-02  3.194e-02  -2.007 0.045340 *  
## batchLieber_30   7.544e-02  5.722e-02   1.318 0.188100    
## batchLieber_315 -2.433e-01  3.612e-02  -6.737 5.37e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2628 on 419 degrees of freedom
## Multiple R-squared:  0.247,  Adjusted R-squared:  0.2344 
## F-statistic: 19.63 on 7 and 419 DF,  p-value: < 2.2e-16
summary(lm(Oligo ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", "Lieber_244", "Lieber_315", "Lieber_30", "Lieber_104"))))
## 
## Call:
## lm(formula = Oligo ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", 
##     "Lieber_244", "Lieber_315", "Lieber_30", "Lieber_104")))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.81720 -0.18558 -0.00756  0.14663  1.06419 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.175e+00  5.387e-02  21.821  < 2e-16 ***
## dxSCZ           -1.127e-01  2.943e-02  -3.828 0.000147 ***
## age              1.644e-02  2.134e-03   7.703 8.06e-14 ***
## age2            -1.259e-04  2.622e-05  -4.800 2.14e-06 ***
## sexM             1.266e-02  2.671e-02   0.474 0.635830    
## batchLieber_244 -1.688e-03  4.538e-02  -0.037 0.970349    
## batchLieber_289 -7.069e-02  4.813e-02  -1.469 0.142591    
## batchLieber_30   7.424e-02  7.198e-02   1.031 0.302892    
## batchLieber_315 -2.475e-01  5.169e-02  -4.787 2.28e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2675 on 466 degrees of freedom
## Multiple R-squared:  0.2603, Adjusted R-squared:  0.2476 
## F-statistic:  20.5 on 8 and 466 DF,  p-value: < 2.2e-16
summary(lm(Oligo ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", "Lieber_244", "Lieber_315", "Lieber_30", "Lieber_104"))))
## 
## Call:
## lm(formula = Oligo ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", 
##     "Lieber_244", "Lieber_315", "Lieber_30", "Lieber_104")))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8684 -0.1949 -0.0080  0.1660  1.1093 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.130e+00  4.652e-02  24.286  < 2e-16 ***
## dxSCZ       -1.017e-01  2.855e-02  -3.562 0.000405 ***
## age          1.662e-02  2.226e-03   7.466 4.06e-13 ***
## age2        -1.346e-04  2.708e-05  -4.970 9.41e-07 ***
## sexM         1.390e-02  2.770e-02   0.502 0.616045    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2812 on 470 degrees of freedom
## Multiple R-squared:  0.1755, Adjusted R-squared:  0.1685 
## F-statistic: 25.01 on 4 and 470 DF,  p-value: < 2.2e-16
summary(lm(OPC ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch == "Lieber_289")))
## 
## Call:
## lm(formula = OPC ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch == "Lieber_289"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.30576 -0.11601  0.01649  0.16040  0.73460 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.805e+00  1.227e-01 -14.702   <2e-16 ***
## dxSCZ       -3.669e-02  5.653e-02  -0.649   0.5177    
## age         -2.984e-03  4.776e-03  -0.625   0.5334    
## age2         7.436e-05  4.798e-05   1.550   0.1241    
## sexM        -1.044e-01  5.503e-02  -1.898   0.0604 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2773 on 110 degrees of freedom
## Multiple R-squared:  0.126,  Adjusted R-squared:  0.09423 
## F-statistic: 3.965 on 4 and 110 DF,  p-value: 0.00481
summary(lm(OPC ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch == "Lieber_315")))
## 
## Call:
## lm(formula = OPC ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch == "Lieber_315"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.45403 -0.15057 -0.02209  0.15028  0.80844 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.704e+00  9.037e-02 -18.859   <2e-16 ***
## dxSCZ        3.976e-02  5.933e-02   0.670    0.505    
## age         -4.509e-03  4.183e-03  -1.078    0.285    
## age2         7.761e-05  5.701e-05   1.361    0.178    
## sexM         2.934e-02  5.925e-02   0.495    0.622    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2153 on 67 degrees of freedom
## Multiple R-squared:  0.04568,    Adjusted R-squared:  -0.01129 
## F-statistic: 0.8018 on 4 and 67 DF,  p-value: 0.5283
print("N.B. Lieber_104 has CONTROLS ONLY")
## [1] "N.B. Lieber_104 has CONTROLS ONLY"
summary(lm(OPC ~ age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch == "Lieber_104")))
## 
## Call:
## lm(formula = OPC ~ age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == 
##     "LIBD") %>% filter(batch == "Lieber_104"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.63144 -0.14533  0.02343  0.12604  0.49079 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.743e+00  9.298e-02 -18.747   <2e-16 ***
## age         -1.046e-02  5.529e-03  -1.891   0.0652 .  
## age2         1.634e-04  7.182e-05   2.275   0.0278 *  
## sexM        -7.200e-02  7.345e-02  -0.980   0.3323    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2529 on 44 degrees of freedom
## Multiple R-squared:  0.1538, Adjusted R-squared:  0.09607 
## F-statistic: 2.665 on 3 and 44 DF,  p-value: 0.05944
print("N.B. Lieber_30 has SCHIZOPHRENIA PATIENTS ONLY")
## [1] "N.B. Lieber_30 has SCHIZOPHRENIA PATIENTS ONLY"
summary(lm(OPC ~ age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch == "Lieber_30")))
## 
## Call:
## lm(formula = OPC ~ age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == 
##     "LIBD") %>% filter(batch == "Lieber_30"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33851 -0.14515 -0.01766  0.02969  0.65265 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -1.9325811  0.5828367  -3.316  0.00314 **
## age          0.0148115  0.0254726   0.581  0.56683   
## age2        -0.0002005  0.0002714  -0.739  0.46796   
## sexM        -0.0562361  0.1057309  -0.532  0.60014   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2339 on 22 degrees of freedom
## Multiple R-squared:  0.06177,    Adjusted R-squared:  -0.06618 
## F-statistic: 0.4828 on 3 and 22 DF,  p-value: 0.6976
summary(lm(OPC ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch == "Lieber_244")))
## 
## Call:
## lm(formula = OPC ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch == "Lieber_244"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8069 -0.1482  0.0037  0.1368  0.8977 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.929e+00  7.314e-02 -26.377  < 2e-16 ***
## dxSCZ        1.313e-01  4.319e-02   3.041  0.00266 ** 
## age         -8.898e-04  4.155e-03  -0.214  0.83065    
## age2         3.147e-05  5.738e-05   0.549  0.58392    
## sexM         6.750e-02  4.242e-02   1.591  0.11303    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2741 on 209 degrees of freedom
## Multiple R-squared:  0.0834, Adjusted R-squared:  0.06586 
## F-statistic: 4.754 on 4 and 209 DF,  p-value: 0.001085
summary(lm(OPC ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", "Lieber_244", "Lieber_315"))))
## 
## Call:
## lm(formula = OPC ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", 
##     "Lieber_244", "Lieber_315")))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.39371 -0.13318  0.00615  0.14620  0.89803 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.847e+00  5.143e-02 -35.919  < 2e-16 ***
## dxSCZ            6.797e-02  3.001e-02   2.265  0.02405 *  
## age             -3.144e-03  2.318e-03  -1.357  0.17568    
## age2             6.883e-05  2.825e-05   2.437  0.01526 *  
## sexM             1.540e-02  2.936e-02   0.525  0.60012    
## batchLieber_289 -5.624e-02  3.260e-02  -1.725  0.08526 .  
## batchLieber_315  1.025e-01  3.680e-02   2.785  0.00562 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2676 on 394 degrees of freedom
## Multiple R-squared:  0.09117,    Adjusted R-squared:  0.07734 
## F-statistic: 6.588 on 6 and 394 DF,  p-value: 1.198e-06
summary(lm(OPC ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", "Lieber_244", "Lieber_315", "Lieber_30"))))
## 
## Call:
## lm(formula = OPC ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", 
##     "Lieber_244", "Lieber_315", "Lieber_30")))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.38816 -0.13186  0.00542  0.14419  0.89805 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.848e+00  5.069e-02 -36.449  < 2e-16 ***
## dxSCZ            7.057e-02  2.973e-02   2.373  0.01807 *  
## age             -2.872e-03  2.293e-03  -1.252  0.21120    
## age2             6.278e-05  2.782e-05   2.257  0.02454 *  
## sexM             1.447e-02  2.828e-02   0.512  0.60912    
## batchLieber_289 -5.323e-02  3.232e-02  -1.647  0.10031    
## batchLieber_30   1.045e-02  5.790e-02   0.180  0.85689    
## batchLieber_315  1.039e-01  3.655e-02   2.843  0.00468 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2659 on 419 degrees of freedom
## Multiple R-squared:  0.08514,    Adjusted R-squared:  0.06986 
## F-statistic: 5.571 on 7 and 419 DF,  p-value: 3.763e-06
summary(lm(OPC ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", "Lieber_244", "Lieber_315", "Lieber_30", "Lieber_104"))))
## 
## Call:
## lm(formula = OPC ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", 
##     "Lieber_244", "Lieber_315", "Lieber_30", "Lieber_104")))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.38908 -0.13300  0.00666  0.14377  0.90483 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.848e+00  5.330e-02 -34.678  < 2e-16 ***
## dxSCZ            6.994e-02  2.912e-02   2.402  0.01670 *  
## age             -3.851e-03  2.111e-03  -1.824  0.06885 .  
## age2             7.521e-05  2.594e-05   2.899  0.00392 ** 
## sexM             3.734e-03  2.643e-02   0.141  0.88770    
## batchLieber_244  2.396e-02  4.490e-02   0.534  0.59385    
## batchLieber_289 -3.185e-02  4.763e-02  -0.669  0.50403    
## batchLieber_30   3.609e-02  7.122e-02   0.507  0.61256    
## batchLieber_315  1.264e-01  5.115e-02   2.471  0.01383 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2647 on 466 degrees of freedom
## Multiple R-squared:  0.08816,    Adjusted R-squared:  0.0725 
## F-statistic: 5.632 on 8 and 466 DF,  p-value: 7.778e-07
summary(lm(OPC ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", "Lieber_244", "Lieber_315", "Lieber_30", "Lieber_104"))))
## 
## Call:
## lm(formula = OPC ~ dx + age + age2 + sex, data = ctp.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(batch %in% c("Lieber_289", 
##     "Lieber_244", "Lieber_315", "Lieber_30", "Lieber_104")))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.44013 -0.13243  0.00328  0.14222  0.90345 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.825e+00  4.434e-02 -41.159  < 2e-16 ***
## dxSCZ        7.634e-02  2.722e-02   2.805  0.00524 ** 
## age         -3.647e-03  2.122e-03  -1.719  0.08629 .  
## age2         6.935e-05  2.582e-05   2.686  0.00749 ** 
## sexM         7.149e-03  2.641e-02   0.271  0.78674    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2681 on 470 degrees of freedom
## Multiple R-squared:  0.05666,    Adjusted R-squared:  0.04863 
## F-statistic: 7.057 on 4 and 470 DF,  p-value: 1.596e-05

8.1.1.3 ROSMAP

summary(lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ dx, data = ctp.clr.1e3 %>% filter(study == "ROSMAP")))
## Response Exc :
## 
## Call:
## lm(formula = Exc ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "ROSMAP"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5375 -0.1346 -0.0053  0.1278  0.6560 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.16747    0.01161  100.60  < 2e-16 ***
## dxCTL       -0.04242    0.01520   -2.79  0.00541 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.201 on 717 degrees of freedom
## Multiple R-squared:  0.01074,    Adjusted R-squared:  0.00936 
## F-statistic: 7.784 on 1 and 717 DF,  p-value: 0.00541
## 
## 
## Response Inh :
## 
## Call:
## lm(formula = Inh ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.34983 -0.06825 -0.01172  0.06145  0.37696 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.836294   0.005961 140.292  < 2e-16 ***
## dxCTL       -0.025514   0.007809  -3.267  0.00114 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1032 on 717 degrees of freedom
## Multiple R-squared:  0.01467,    Adjusted R-squared:  0.0133 
## F-statistic: 10.68 on 1 and 717 DF,  p-value: 0.001137
## 
## 
## Response Astro :
## 
## Call:
## lm(formula = Astro ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51884 -0.11213 -0.01432  0.10215  0.61453 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.76711    0.01001  76.663   <2e-16 ***
## dxCTL        0.01667    0.01311   1.272    0.204    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1733 on 717 degrees of freedom
## Multiple R-squared:  0.002251,   Adjusted R-squared:  0.0008596 
## F-statistic: 1.618 on 1 and 717 DF,  p-value: 0.2038
## 
## 
## Response Endo :
## 
## Call:
## lm(formula = Endo ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31397 -0.05869  0.00151  0.06707  0.42249 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.916359   0.005554 -165.00  < 2e-16 ***
## dxCTL        0.042850   0.007275    5.89 5.94e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09619 on 717 degrees of freedom
## Multiple R-squared:  0.04615,    Adjusted R-squared:  0.04482 
## F-statistic: 34.69 on 1 and 717 DF,  p-value: 5.941e-09
## 
## 
## Response Micro :
## 
## Call:
## lm(formula = Micro ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65997 -0.09220  0.00041  0.09769  0.60584 
## 
## Coefficients:
##              Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) -1.138531   0.009102 -125.088   <2e-16 ***
## dxCTL        0.022141   0.011923    1.857   0.0637 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1576 on 717 degrees of freedom
## Multiple R-squared:  0.004786,   Adjusted R-squared:  0.003398 
## F-statistic: 3.448 on 1 and 717 DF,  p-value: 0.06372
## 
## 
## Response Oligo :
## 
## Call:
## lm(formula = Oligo ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.81666 -0.14737 -0.01577  0.12565  0.86690 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.336109   0.012277 108.826   <2e-16 ***
## dxCTL       0.008825   0.016083   0.549    0.583    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2127 on 717 degrees of freedom
## Multiple R-squared:  0.0004197,  Adjusted R-squared:  -0.0009744 
## F-statistic: 0.3011 on 1 and 717 DF,  p-value: 0.5834
## 
## 
## Response OPC :
## 
## Call:
## lm(formula = OPC ~ dx, data = ctp.clr.1e3 %>% filter(study == 
##     "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.85349 -0.15899  0.09949  0.26302  0.71132 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.05209    0.02258 -90.885   <2e-16 ***
## dxCTL       -0.02256    0.02958  -0.763    0.446    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3911 on 717 degrees of freedom
## Multiple R-squared:  0.0008106,  Adjusted R-squared:  -0.000583 
## F-statistic: 0.5817 on 1 and 717 DF,  p-value: 0.4459
summary(lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% filter(study == "ROSMAP")))
## Response Exc :
## 
## Call:
## lm(formula = Exc ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.62110 -0.11409 -0.00245  0.11441  0.59751 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.8511518  1.7594377  -0.484  0.62870    
## dxCTL        -0.0444254  0.0149808  -2.965  0.00312 ** 
## age           0.0491061  0.0426217   1.152  0.24965    
## age2         -0.0002837  0.0002571  -1.103  0.27022    
## sexM         -0.0441214  0.0150221  -2.937  0.00342 ** 
## batchROSMAP1 -0.1310765  0.0146939  -8.920  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1896 on 712 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1248, Adjusted R-squared:  0.1186 
## F-statistic:  20.3 on 5 and 712 DF,  p-value: < 2.2e-16
## 
## 
## Response Inh :
## 
## Call:
## lm(formula = Inh ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35631 -0.07035 -0.01002  0.05842  0.36748 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.050e+00  9.564e-01   1.098 0.272552    
## dxCTL        -2.834e-02  8.143e-03  -3.480 0.000533 ***
## age          -4.418e-03  2.317e-02  -0.191 0.848817    
## age2          2.384e-05  1.398e-04   0.171 0.864649    
## sexM          8.513e-03  8.166e-03   1.043 0.297492    
## batchROSMAP1 -1.922e-02  7.987e-03  -2.407 0.016343 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1031 on 712 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.02497,    Adjusted R-squared:  0.01812 
## F-statistic: 3.647 on 5 and 712 DF,  p-value: 0.002893
## 
## 
## Response Astro :
## 
## Call:
## lm(formula = Astro ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.52432 -0.09742  0.00032  0.09707  0.50874 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.9243318  1.4388027  -1.337   0.1815    
## dxCTL         0.0027631  0.0122508   0.226   0.8216    
## age           0.0685744  0.0348545   1.967   0.0495 *  
## age2         -0.0004177  0.0002103  -1.987   0.0474 *  
## sexM          0.0092936  0.0122845   0.757   0.4496    
## batchROSMAP1 -0.1603124  0.0120162 -13.341   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1551 on 712 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2069, Adjusted R-squared:  0.2013 
## F-statistic: 37.15 on 5 and 712 DF,  p-value: < 2.2e-16
## 
## 
## Response Endo :
## 
## Call:
## lm(formula = Endo ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32014 -0.05805  0.00400  0.06465  0.39716 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.125e-01  8.802e-01  -0.696  0.48669    
## dxCTL         3.930e-02  7.494e-03   5.244 2.07e-07 ***
## age          -5.096e-03  2.132e-02  -0.239  0.81118    
## age2          2.165e-05  1.286e-04   0.168  0.86639    
## sexM         -3.065e-02  7.515e-03  -4.079 5.03e-05 ***
## batchROSMAP1 -1.995e-02  7.351e-03  -2.714  0.00681 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09485 on 712 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.07886,    Adjusted R-squared:  0.07239 
## F-statistic: 12.19 on 5 and 712 DF,  p-value: 2.36e-11
## 
## 
## Response Micro :
## 
## Call:
## lm(formula = Micro ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.68648 -0.09009 -0.00177  0.09708  0.62421 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   1.7462917  1.4522618   1.202  0.22958   
## dxCTL         0.0162101  0.0123654   1.311  0.19031   
## age          -0.0676964  0.0351805  -1.924  0.05472 . 
## age2          0.0003945  0.0002123   1.859  0.06350 . 
## sexM          0.0333366  0.0123995   2.689  0.00734 **
## batchROSMAP1  0.0027800  0.0121286   0.229  0.81877   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1565 on 712 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.02601,    Adjusted R-squared:  0.01917 
## F-statistic: 3.802 on 5 and 712 DF,  p-value: 0.002093
## 
## 
## Response Oligo :
## 
## Call:
## lm(formula = Oligo ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.77402 -0.13535 -0.01136  0.12072  0.80663 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.262e+00  1.922e+00   0.657    0.512    
## dxCTL         9.959e-04  1.637e-02   0.061    0.951    
## age           4.090e-03  4.657e-02   0.088    0.930    
## age2         -2.879e-05  2.810e-04  -0.102    0.918    
## sexM          1.237e-02  1.641e-02   0.754    0.451    
## batchROSMAP1 -1.006e-01  1.605e-02  -6.268 6.33e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2072 on 712 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.05397,    Adjusted R-squared:  0.04733 
## F-statistic: 8.124 on 5 and 712 DF,  p-value: 1.808e-07
## 
## 
## Response OPC :
## 
## Call:
## lm(formula = OPC ~ dx + age + age2 + sex + batch, data = ctp.clr.1e3 %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.57100 -0.12318  0.05148  0.19277  0.83712 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.6706796  3.0883517  -0.217    0.828    
## dxCTL         0.0134924  0.0262960   0.513    0.608    
## age          -0.0445606  0.0748142  -0.596    0.552    
## age2          0.0002903  0.0004514   0.643    0.520    
## sexM          0.0112604  0.0263684   0.427    0.669    
## batchROSMAP1  0.4284120  0.0257924  16.610   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3328 on 712 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2813, Adjusted R-squared:  0.2763 
## F-statistic: 55.74 on 5 and 712 DF,  p-value: < 2.2e-16

8.1.2 clr, adjustment for oligodendrocytes

8.1.2.1 ASDbrain

summary(lm(cbind(Exc, Inh, Astro, Endo, Micro, OPC) ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID)))
## Response Exc :
## 
## Call:
## lm(formula = Exc ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32825 -0.08405  0.02557  0.08385  0.23387 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.91393    0.02486  36.770  < 2e-16 ***
## dxCTL        0.11309    0.03643   3.104  0.00299 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1384 on 56 degrees of freedom
## Multiple R-squared:  0.1468, Adjusted R-squared:  0.1316 
## F-statistic: 9.637 on 1 and 56 DF,  p-value: 0.002988
## 
## 
## Response Inh :
## 
## Call:
## lm(formula = Inh ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23197 -0.09340 -0.02429  0.04625  0.45290 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.92788    0.02606  35.611   <2e-16 ***
## dxCTL       -0.01865    0.03819  -0.488    0.627    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1451 on 56 degrees of freedom
## Multiple R-squared:  0.004241,   Adjusted R-squared:  -0.01354 
## F-statistic: 0.2385 on 1 and 56 DF,  p-value: 0.6272
## 
## 
## Response Astro :
## 
## Call:
## lm(formula = Astro ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.57292 -0.08789  0.03716  0.14975  0.30203 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.15600    0.03468   4.499 3.49e-05 ***
## dxCTL        0.01812    0.05082   0.357    0.723    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1931 on 56 degrees of freedom
## Multiple R-squared:  0.002265,   Adjusted R-squared:  -0.01555 
## F-statistic: 0.1271 on 1 and 56 DF,  p-value: 0.7228
## 
## 
## Response Endo :
## 
## Call:
## lm(formula = Endo ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.121394 -0.057834 -0.001869  0.043912  0.177785 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.61465    0.01212 -50.702   <2e-16 ***
## dxCTL       -0.01163    0.01777  -0.655    0.515    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0675 on 56 degrees of freedom
## Multiple R-squared:  0.007594,   Adjusted R-squared:  -0.01013 
## F-statistic: 0.4285 on 1 and 56 DF,  p-value: 0.5154
## 
## 
## Response Micro :
## 
## Call:
## lm(formula = Micro ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23966 -0.06034 -0.01490  0.05979  0.26060 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.54527    0.01602  -34.03  < 2e-16 ***
## dxCTL       -0.07422    0.02349   -3.16  0.00254 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08922 on 56 degrees of freedom
## Multiple R-squared:  0.1513, Adjusted R-squared:  0.1362 
## F-statistic: 9.987 on 1 and 56 DF,  p-value: 0.002543
## 
## 
## Response OPC :
## 
## Call:
## lm(formula = OPC ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.14102 -0.04735 -0.01266  0.03265  0.23185 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.83789    0.01265  -66.22   <2e-16 ***
## dxCTL       -0.02671    0.01855   -1.44    0.155    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07045 on 56 degrees of freedom
## Multiple R-squared:  0.03572,    Adjusted R-squared:  0.0185 
## F-statistic: 2.074 on 1 and 56 DF,  p-value: 0.1554
summary(lm(cbind(Exc, Inh, Astro, Endo, Micro, OPC) ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID)))
## Response Exc :
## 
## Call:
## lm(formula = Exc ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35643 -0.07457  0.04598  0.09263  0.19777 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     6.975e-01  1.360e-01   5.127 4.41e-06 ***
## dxCTL           1.004e-01  3.875e-02   2.592   0.0124 *  
## age             1.412e-02  6.872e-03   2.055   0.0449 *  
## age2           -1.573e-04  8.429e-05  -1.867   0.0676 .  
## sexM           -5.897e-02  4.288e-02  -1.375   0.1749    
## batchASDbrain2 -3.178e-02  5.787e-02  -0.549   0.5852    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.133 on 52 degrees of freedom
## Multiple R-squared:  0.2685, Adjusted R-squared:  0.1982 
## F-statistic: 3.818 on 5 and 52 DF,  p-value: 0.005083
## 
## 
## Response Inh :
## 
## Call:
## lm(formula = Inh ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.16288 -0.08607 -0.04305  0.05087  0.43783 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     9.320e-01  1.459e-01   6.387 4.69e-08 ***
## dxCTL          -1.953e-02  4.156e-02  -0.470   0.6404    
## age            -5.631e-03  7.371e-03  -0.764   0.4484    
## age2            8.096e-05  9.041e-05   0.895   0.3747    
## sexM            9.120e-02  4.599e-02   1.983   0.0527 .  
## batchASDbrain2  1.782e-02  6.207e-02   0.287   0.7752    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1426 on 52 degrees of freedom
## Multiple R-squared:  0.1062, Adjusted R-squared:  0.02022 
## F-statistic: 1.235 on 5 and 52 DF,  p-value: 0.3061
## 
## 
## Response Astro :
## 
## Call:
## lm(formula = Astro ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.56849 -0.11162  0.04274  0.14202  0.27512 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)     1.013e-01  2.026e-01   0.500    0.619
## dxCTL           1.026e-02  5.770e-02   0.178    0.860
## age             4.758e-03  1.023e-02   0.465    0.644
## age2           -4.911e-05  1.255e-04  -0.391    0.697
## sexM           -5.058e-02  6.385e-02  -0.792    0.432
## batchASDbrain2 -6.758e-03  8.618e-02  -0.078    0.938
## 
## Residual standard error: 0.198 on 52 degrees of freedom
## Multiple R-squared:  0.02522,    Adjusted R-squared:  -0.06851 
## F-statistic: 0.2691 on 5 and 52 DF,  p-value: 0.928
## 
## 
## Response Endo :
## 
## Call:
## lm(formula = Endo ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.12373 -0.04755 -0.00116  0.04032  0.17173 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -5.417e-01  6.950e-02  -7.794  2.7e-10 ***
## dxCTL          -7.020e-03  1.980e-02  -0.355    0.724    
## age            -2.943e-03  3.511e-03  -0.838    0.406    
## age2            2.434e-05  4.306e-05   0.565    0.574    
## sexM           -4.252e-03  2.191e-02  -0.194    0.847    
## batchASDbrain2 -3.498e-03  2.957e-02  -0.118    0.906    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06794 on 52 degrees of freedom
## Multiple R-squared:  0.06643,    Adjusted R-squared:  -0.02334 
## F-statistic:  0.74 on 5 and 52 DF,  p-value: 0.597
## 
## 
## Response Micro :
## 
## Call:
## lm(formula = Micro ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.247334 -0.040037 -0.002593  0.037348  0.182667 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -3.499e-01  7.965e-02  -4.392 5.53e-05 ***
## dxCTL          -5.545e-02  2.269e-02  -2.444   0.0179 *  
## age            -9.351e-03  4.024e-03  -2.324   0.0241 *  
## age2            8.194e-05  4.935e-05   1.660   0.1029    
## sexM            1.720e-02  2.510e-02   0.685   0.4963    
## batchASDbrain2  1.109e-02  3.388e-02   0.327   0.7447    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07786 on 52 degrees of freedom
## Multiple R-squared:  0.3999, Adjusted R-squared:  0.3422 
## F-statistic: 6.929 on 5 and 52 DF,  p-value: 5.039e-05
## 
## 
## Response OPC :
## 
## Call:
## lm(formula = OPC ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.16714 -0.04313 -0.01055  0.03759  0.22117 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -8.392e-01  7.384e-02 -11.365 1.04e-15 ***
## dxCTL          -2.868e-02  2.103e-02  -1.364    0.179    
## age            -9.570e-04  3.730e-03  -0.257    0.799    
## age2            1.919e-05  4.575e-05   0.419    0.677    
## sexM            5.408e-03  2.327e-02   0.232    0.817    
## batchASDbrain2  1.313e-02  3.141e-02   0.418    0.678    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07218 on 52 degrees of freedom
## Multiple R-squared:  0.06018,    Adjusted R-squared:  -0.03018 
## F-statistic: 0.666 on 5 and 52 DF,  p-value: 0.6509

8.1.2.2 LIBD

summary(lm(cbind(Exc, Inh, Astro, Endo, Micro, OPC) ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID)))
## Response Exc :
## 
## Call:
## lm(formula = Exc ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.30930 -0.06894  0.02787  0.11734  0.69250 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.47625    0.01475 100.059   <2e-16 ***
## dxSCZ       -0.02374    0.02175  -1.091    0.276    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2173 on 400 degrees of freedom
## Multiple R-squared:  0.002969,   Adjusted R-squared:  0.0004763 
## F-statistic: 1.191 on 1 and 400 DF,  p-value: 0.2758
## 
## 
## Response Inh :
## 
## Call:
## lm(formula = Inh ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.40167 -0.08593 -0.01063  0.06652  0.76092 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.43113    0.01066 134.285  < 2e-16 ***
## dxSCZ       -0.04703    0.01571  -2.993  0.00293 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.157 on 400 degrees of freedom
## Multiple R-squared:  0.02191,    Adjusted R-squared:  0.01947 
## F-statistic: 8.961 on 1 and 400 DF,  p-value: 0.00293
## 
## 
## Response Astro :
## 
## Call:
## lm(formula = Astro ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.67228 -0.10054  0.01815  0.11845  0.56670 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.51435    0.01285  40.024   <2e-16 ***
## dxSCZ        0.02237    0.01894   1.181    0.238    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1893 on 400 degrees of freedom
## Multiple R-squared:  0.003473,   Adjusted R-squared:  0.0009813 
## F-statistic: 1.394 on 1 and 400 DF,  p-value: 0.2385
## 
## 
## Response Endo :
## 
## Call:
## lm(formula = Endo ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23296 -0.06172 -0.01918  0.05239  0.37768 
## 
## Coefficients:
##              Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) -0.736283   0.006545 -112.490   <2e-16 ***
## dxSCZ        0.008716   0.009648    0.903    0.367    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09642 on 400 degrees of freedom
## Multiple R-squared:  0.002036,   Adjusted R-squared:  -0.0004589 
## F-statistic: 0.816 on 1 and 400 DF,  p-value: 0.3669
## 
## 
## Response Micro :
## 
## Call:
## lm(formula = Micro ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39508 -0.08838 -0.00046  0.07998  0.45376 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.847647   0.008924 -94.989  < 2e-16 ***
## dxSCZ       -0.044959   0.013154  -3.418 0.000696 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1315 on 400 degrees of freedom
## Multiple R-squared:  0.02837,    Adjusted R-squared:  0.02595 
## F-statistic: 11.68 on 1 and 400 DF,  p-value: 0.0006962
## 
## 
## Response OPC :
## 
## Call:
## lm(formula = OPC ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2837 -0.1360  0.0154  0.1333  0.7990 
## 
## Coefficients:
##             Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) -1.83781    0.01659 -110.809  < 2e-16 ***
## dxSCZ        0.08464    0.02445    3.462 0.000594 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2443 on 400 degrees of freedom
## Multiple R-squared:  0.02909,    Adjusted R-squared:  0.02666 
## F-statistic: 11.99 on 1 and 400 DF,  p-value: 0.0005941
summary(lm(cbind(Exc, Inh, Astro, Endo, Micro, OPC) ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID)))
## Response Exc :
## 
## Call:
## lm(formula = Exc ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.18825 -0.06449  0.03014  0.10607  0.63626 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.280e+00  8.807e-02  14.534  < 2e-16 ***
## dxSCZ           -3.594e-02  2.377e-02  -1.512  0.13128    
## age              9.243e-03  3.324e-03   2.781  0.00568 ** 
## age2            -6.514e-05  3.386e-05  -1.924  0.05510 .  
## sexM            -1.136e-03  2.294e-02  -0.050  0.96052    
## batchLieber_244 -8.171e-02  4.304e-02  -1.898  0.05838 .  
## batchLieber_289 -6.174e-02  4.364e-02  -1.415  0.15795    
## batchLieber_30  -5.257e-02  6.127e-02  -0.858  0.39145    
## batchLieber_315 -1.175e-01  4.754e-02  -2.472  0.01387 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2111 on 393 degrees of freedom
## Multiple R-squared:  0.07575,    Adjusted R-squared:  0.05694 
## F-statistic: 4.026 on 8 and 393 DF,  p-value: 0.0001299
## 
## 
## Response Inh :
## 
## Call:
## lm(formula = Inh ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33226 -0.08571 -0.02126  0.05564  0.76881 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1.427e+00  6.066e-02  23.528  < 2e-16 ***
## dxSCZ           -5.332e-02  1.637e-02  -3.257  0.00122 ** 
## age              1.005e-04  2.290e-03   0.044  0.96502    
## age2             7.817e-06  2.332e-05   0.335  0.73770    
## sexM             1.630e-02  1.580e-02   1.032  0.30285    
## batchLieber_244  9.977e-03  2.965e-02   0.337  0.73665    
## batchLieber_289 -3.446e-02  3.006e-02  -1.146  0.25238    
## batchLieber_30   3.523e-02  4.221e-02   0.835  0.40444    
## batchLieber_315 -1.645e-01  3.274e-02  -5.025 7.65e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1454 on 393 degrees of freedom
## Multiple R-squared:  0.1755, Adjusted R-squared:  0.1587 
## F-statistic: 10.46 on 8 and 393 DF,  p-value: 2.57e-13
## 
## 
## Response Astro :
## 
## Call:
## lm(formula = Astro ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.64753 -0.09570  0.01925  0.11307  0.57792 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      4.553e-01  7.797e-02   5.840  1.1e-08 ***
## dxSCZ            2.692e-02  2.104e-02   1.279    0.202    
## age              3.906e-03  2.943e-03   1.327    0.185    
## age2            -4.607e-05  2.998e-05  -1.537    0.125    
## sexM            -2.093e-02  2.031e-02  -1.031    0.303    
## batchLieber_244 -2.738e-02  3.811e-02  -0.719    0.473    
## batchLieber_289  1.406e-02  3.864e-02   0.364    0.716    
## batchLieber_30  -4.280e-02  5.425e-02  -0.789    0.431    
## batchLieber_315  6.712e-02  4.209e-02   1.595    0.112    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1869 on 393 degrees of freedom
## Multiple R-squared:  0.04565,    Adjusted R-squared:  0.02622 
## F-statistic:  2.35 on 8 and 393 DF,  p-value: 0.01777
## 
## 
## Response Endo :
## 
## Call:
## lm(formula = Endo ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.22176 -0.06171 -0.00726  0.04588  0.34506 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -6.661e-01  3.748e-02 -17.773  < 2e-16 ***
## dxSCZ            2.095e-02  1.011e-02   2.072 0.038947 *  
## age             -1.824e-03  1.415e-03  -1.290 0.197878    
## age2             7.491e-08  1.441e-05   0.005 0.995855    
## sexM            -2.522e-02  9.762e-03  -2.583 0.010151 *  
## batchLieber_244  9.900e-03  1.832e-02   0.540 0.589163    
## batchLieber_289  6.355e-02  1.857e-02   3.422 0.000687 ***
## batchLieber_30  -3.273e-02  2.608e-02  -1.255 0.210191    
## batchLieber_315  4.631e-02  2.023e-02   2.289 0.022603 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08984 on 393 degrees of freedom
## Multiple R-squared:  0.1488, Adjusted R-squared:  0.1314 
## F-statistic: 8.584 on 8 and 393 DF,  p-value: 8.451e-11
## 
## 
## Response Micro :
## 
## Call:
## lm(formula = Micro ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36459 -0.05973 -0.01065  0.06109  0.36642 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -6.132e-01  4.529e-02 -13.541  < 2e-16 ***
## dxSCZ           -2.086e-02  1.222e-02  -1.707   0.0887 .  
## age             -8.573e-03  1.709e-03  -5.016 8.01e-07 ***
## age2             4.164e-05  1.741e-05   2.392   0.0172 *  
## sexM             2.689e-02  1.180e-02   2.280   0.0232 *  
## batchLieber_244  4.293e-02  2.213e-02   1.940   0.0531 .  
## batchLieber_289  4.059e-02  2.244e-02   1.809   0.0713 .  
## batchLieber_30   5.134e-02  3.151e-02   1.629   0.1040    
## batchLieber_315  3.597e-02  2.445e-02   1.472   0.1419    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1086 on 393 degrees of freedom
## Multiple R-squared:  0.3489, Adjusted R-squared:  0.3357 
## F-statistic: 26.33 on 8 and 393 DF,  p-value: < 2.2e-16
## 
## 
## Response OPC :
## 
## Call:
## lm(formula = OPC ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.29501 -0.12588  0.01039  0.13164  0.70331 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -1.883e+00  9.917e-02 -18.990   <2e-16 ***
## dxSCZ            6.224e-02  2.676e-02   2.326   0.0205 *  
## age             -2.852e-03  3.743e-03  -0.762   0.4465    
## age2             6.167e-05  3.813e-05   1.618   0.1066    
## sexM             4.090e-03  2.583e-02   0.158   0.8743    
## batchLieber_244  4.628e-02  4.847e-02   0.955   0.3402    
## batchLieber_289 -2.201e-02  4.914e-02  -0.448   0.6545    
## batchLieber_30   4.152e-02  6.900e-02   0.602   0.5476    
## batchLieber_315  1.327e-01  5.353e-02   2.478   0.0136 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2377 on 393 degrees of freedom
## Multiple R-squared:  0.09689,    Adjusted R-squared:  0.07851 
## F-statistic:  5.27 on 8 and 393 DF,  p-value: 2.752e-06

8.1.2.3 ROSMAP

summary(lm(cbind(Exc, Inh, Astro, Endo, Micro, OPC) ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == "ROSMAP")))
## Response Exc :
## 
## Call:
## lm(formula = Exc ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48817 -0.12401 -0.01904  0.11926  0.79840 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.74318    0.01137 153.269   <2e-16 ***
## dxCTL       -0.02783    0.01490  -1.868   0.0622 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.197 on 717 degrees of freedom
## Multiple R-squared:  0.004843,   Adjusted R-squared:  0.003455 
## F-statistic: 3.489 on 1 and 717 DF,  p-value: 0.06217
## 
## 
## Response Inh :
## 
## Call:
## lm(formula = Inh ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51343 -0.07707 -0.01463  0.06899  0.42160 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.412007   0.006460 218.587   <2e-16 ***
## dxCTL       -0.010929   0.008462  -1.292    0.197    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1119 on 717 degrees of freedom
## Multiple R-squared:  0.002321,   Adjusted R-squared:  0.0009296 
## F-statistic: 1.668 on 1 and 717 DF,  p-value: 0.1969
## 
## 
## Response Astro :
## 
## Call:
## lm(formula = Astro ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.50563 -0.12266 -0.01961  0.10709  0.68974 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.81328    0.01073  75.800   <2e-16 ***
## dxCTL        0.01159    0.01405   0.824     0.41    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1858 on 717 degrees of freedom
## Multiple R-squared:  0.0009467,  Adjusted R-squared:  -0.0004466 
## F-statistic: 0.6795 on 1 and 717 DF,  p-value: 0.41
## 
## 
## Response Endo :
## 
## Call:
## lm(formula = Endo ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33562 -0.05981  0.00009  0.06321  0.49624 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.870188   0.005565 -156.37  < 2e-16 ***
## dxCTL        0.037764   0.007290    5.18 2.88e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09639 on 717 degrees of freedom
## Multiple R-squared:  0.03608,    Adjusted R-squared:  0.03473 
## F-statistic: 26.84 on 1 and 717 DF,  p-value: 2.881e-07
## 
## 
## Response Micro :
## 
## Call:
## lm(formula = Micro ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.60978 -0.08418 -0.00458  0.09132  0.57273 
## 
## Coefficients:
##              Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) -1.092360   0.008384 -130.285   <2e-16 ***
## dxCTL        0.017055   0.010983    1.553    0.121    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1452 on 717 degrees of freedom
## Multiple R-squared:  0.003352,   Adjusted R-squared:  0.001962 
## F-statistic: 2.411 on 1 and 717 DF,  p-value: 0.1209
## 
## 
## Response OPC :
## 
## Call:
## lm(formula = OPC ~ dx, data = ctp_adjoligo.clr.1e3 %>% filter(study == 
##     "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.79277 -0.15594  0.09778  0.24795  0.67619 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.00592    0.02162 -92.768   <2e-16 ***
## dxCTL       -0.02764    0.02833  -0.976    0.329    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3745 on 717 degrees of freedom
## Multiple R-squared:  0.001327,   Adjusted R-squared:  -6.62e-05 
## F-statistic: 0.9525 on 1 and 717 DF,  p-value: 0.3294
summary(lm(cbind(Exc, Inh, Astro, Endo, Micro, OPC) ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% filter(study == "ROSMAP")))
## Response Exc :
## 
## Call:
## lm(formula = Exc ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55841 -0.11794 -0.00286  0.11007  0.69040 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.0777728  1.6910236  -0.046   0.9633    
## dxCTL        -0.0326284  0.0143983  -2.266   0.0237 *  
## age           0.0451458  0.0409644   1.102   0.2708    
## age2         -0.0002629  0.0002471  -1.064   0.2878    
## sexM         -0.0319264  0.0144380  -2.211   0.0273 *  
## batchROSMAP1 -0.1533422  0.0141226 -10.858   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1822 on 712 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1543, Adjusted R-squared:  0.1483 
## F-statistic: 25.98 on 5 and 712 DF,  p-value: < 2.2e-16
## 
## 
## Response Inh :
## 
## Call:
## lm(formula = Inh ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48842 -0.07664 -0.01010  0.06065  0.40217 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.824e+00  1.018e+00   1.792   0.0736 .  
## dxCTL        -1.654e-02  8.666e-03  -1.908   0.0567 .  
## age          -8.378e-03  2.465e-02  -0.340   0.7341    
## age2          4.467e-05  1.488e-04   0.300   0.7640    
## sexM          2.071e-02  8.690e-03   2.383   0.0174 *  
## batchROSMAP1 -4.149e-02  8.500e-03  -4.881  1.3e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1097 on 712 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.04505,    Adjusted R-squared:  0.03835 
## F-statistic: 6.718 on 5 and 712 DF,  p-value: 3.954e-06
## 
## 
## Response Astro :
## 
## Call:
## lm(formula = Astro ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.57579 -0.10379  0.00002  0.10353  0.57667 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.9954570  1.5381785  -1.297   0.1950    
## dxCTL        -0.0028865  0.0130969  -0.220   0.8256    
## age           0.0715771  0.0372618   1.921   0.0551 .  
## age2         -0.0004354  0.0002248  -1.937   0.0532 .  
## sexM          0.0062888  0.0131330   0.479   0.6322    
## batchROSMAP1 -0.1743368  0.0128461 -13.571   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1658 on 712 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2106, Adjusted R-squared:  0.2051 
## F-statistic: 37.99 on 5 and 712 DF,  p-value: < 2.2e-16
## 
## 
## Response Endo :
## 
## Call:
## lm(formula = Endo ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.33845 -0.05798  0.00350  0.06103  0.46024 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.837e-01  8.716e-01  -0.784    0.433    
## dxCTL         3.365e-02  7.421e-03   4.534 6.78e-06 ***
## age          -2.093e-03  2.111e-02  -0.099    0.921    
## age2          4.038e-06  1.274e-04   0.032    0.975    
## sexM         -3.366e-02  7.442e-03  -4.523 7.14e-06 ***
## batchROSMAP1 -3.397e-02  7.279e-03  -4.667 3.64e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.09393 on 712 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.09086,    Adjusted R-squared:  0.08448 
## F-statistic: 14.23 on 5 and 712 DF,  p-value: 2.714e-13
## 
## 
## Response Micro :
## 
## Call:
## lm(formula = Micro ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.62960 -0.08482 -0.00259  0.09109  0.58138 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   1.6751665  1.3364847   1.253  0.21047   
## dxCTL         0.0105606  0.0113796   0.928  0.35371   
## age          -0.0646936  0.0323758  -1.998  0.04607 * 
## age2          0.0003769  0.0001953   1.929  0.05407 . 
## sexM          0.0303318  0.0114109   2.658  0.00803 **
## batchROSMAP1 -0.0112445  0.0111616  -1.007  0.31408   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.144 on 712 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.02649,    Adjusted R-squared:  0.01965 
## F-statistic: 3.875 on 5 and 712 DF,  p-value: 0.001798
## 
## 
## Response OPC :
## 
## Call:
## lm(formula = OPC ~ dx + age + age2 + sex + batch, data = ctp_adjoligo.clr.1e3 %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.51132 -0.11639  0.04858  0.18850  0.77992 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.7418047  2.9456009  -0.252    0.801    
## dxCTL         0.0078429  0.0250805   0.313    0.755    
## age          -0.0415578  0.0713561  -0.582    0.560    
## age2          0.0002727  0.0004305   0.633    0.527    
## sexM          0.0082556  0.0251496   0.328    0.743    
## batchROSMAP1  0.4143876  0.0246002  16.845   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3174 on 712 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.2875, Adjusted R-squared:  0.2825 
## F-statistic: 57.46 on 5 and 712 DF,  p-value: < 2.2e-16

8.2 Neurons vs glia (no clr)

summary(lm(Glial ~ dx, data = ctp %>% filter(study == "ASDbrain")))
## 
## Call:
## lm(formula = Glial ~ dx, data = ctp %>% filter(study == "ASDbrain"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.08559 -0.03815 -0.01436  0.02309  0.16634 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.588293   0.008799  66.856   <2e-16 ***
## dxCTL       -0.003401   0.013354  -0.255      0.8    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0577 on 74 degrees of freedom
## Multiple R-squared:  0.0008757,  Adjusted R-squared:  -0.01263 
## F-statistic: 0.06486 on 1 and 74 DF,  p-value: 0.7997
summary(lm(Glial ~ dx, data = ctp %>% filter(study == "LIBD")))
## 
## Call:
## lm(formula = Glial ~ dx, data = ctp %>% filter(study == "LIBD"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.132700 -0.043370 -0.007019  0.033785  0.240464 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.625528   0.003815 163.962   <2e-16 ***
## dxSCZ       -0.003258   0.006097  -0.534    0.593    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06486 on 473 degrees of freedom
## Multiple R-squared:  0.0006033,  Adjusted R-squared:  -0.00151 
## F-statistic: 0.2855 on 1 and 473 DF,  p-value: 0.5934
summary(lm(Glial ~ dx, data = ctp %>% filter(study == "ROSMAP")))
## 
## Call:
## lm(formula = Glial ~ dx, data = ctp %>% filter(study == "ROSMAP"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.109166 -0.026815 -0.002024  0.023203  0.179770 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.516193   0.002345 220.162  < 2e-16 ***
## dxCTL       0.011713   0.003071   3.814 0.000149 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04061 on 717 degrees of freedom
## Multiple R-squared:  0.01988,    Adjusted R-squared:  0.01851 
## F-statistic: 14.54 on 1 and 717 DF,  p-value: 0.0001487
#summary(lm(mcc_NeuN_neg ~ dx, data = ctp))
#summary(lm(h_NeuN_neg ~ dx, data = ctp))

summary(lm(Glial ~ dx + age + age2 + sex + batch, data = ctp %>% filter(study =="ASDbrain")))
## 
## Call:
## lm(formula = Glial ~ dx + age + age2 + sex + batch, data = ctp %>% 
##     filter(study == "ASDbrain"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.08206 -0.03987 -0.01735  0.02535  0.16575 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     5.290e-01  2.805e-02  18.861   <2e-16 ***
## dxCTL          -6.311e-03  1.604e-02  -0.394   0.6951    
## age             2.568e-03  1.432e-03   1.793   0.0772 .  
## age2           -2.918e-05  1.828e-05  -1.596   0.1150    
## sexM            2.021e-02  1.674e-02   1.207   0.2315    
## batchASDbrain2  1.501e-02  2.296e-02   0.654   0.5154    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05708 on 70 degrees of freedom
## Multiple R-squared:  0.07513,    Adjusted R-squared:  0.009064 
## F-statistic: 1.137 on 5 and 70 DF,  p-value: 0.349
summary(lm(Glial ~ dx + age + age2 + sex + batch, data = ctp %>% filter(study =="LIBD")))
## 
## Call:
## lm(formula = Glial ~ dx + age + age2 + sex + batch, data = ctp %>% 
##     filter(study == "LIBD"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.126317 -0.042633 -0.007354  0.032134  0.244239 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      5.898e-01  1.258e-02  46.886  < 2e-16 ***
## dxSCZ           -1.788e-02  6.872e-03  -2.601  0.00958 ** 
## age              1.764e-03  4.983e-04   3.540  0.00044 ***
## age2            -1.222e-05  6.123e-06  -1.996  0.04655 *  
## sexM            -1.711e-03  6.237e-03  -0.274  0.78394    
## batchLieber_244  4.510e-03  1.060e-02   0.426  0.67055    
## batchLieber_289 -1.589e-02  1.124e-02  -1.414  0.15797    
## batchLieber_30   1.887e-02  1.681e-02   1.123  0.26209    
## batchLieber_315 -2.463e-02  1.207e-02  -2.041  0.04184 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06246 on 466 degrees of freedom
## Multiple R-squared:  0.08682,    Adjusted R-squared:  0.07114 
## F-statistic: 5.538 on 8 and 466 DF,  p-value: 1.049e-06
summary(lm(Glial ~ dx + age + age2 + sex + batch, data = ctp %>% filter(study =="ROSMAP")))
## 
## Call:
## lm(formula = Glial ~ dx + age + age2 + sex + batch, data = ctp %>% 
##     filter(study == "ROSMAP"))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.110572 -0.025777 -0.002267  0.024340  0.186223 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   6.176e-01  3.742e-01   1.651  0.09927 . 
## dxCTL         1.034e-02  3.186e-03   3.245  0.00123 **
## age          -2.060e-03  9.064e-03  -0.227  0.82029   
## age2          1.048e-05  5.468e-05   0.192  0.84808   
## sexM          7.630e-03  3.195e-03   2.389  0.01717 * 
## batchROSMAP1 -5.950e-03  3.125e-03  -1.904  0.05730 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04032 on 712 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.03606,    Adjusted R-squared:  0.02929 
## F-statistic: 5.326 on 5 and 712 DF,  p-value: 8.156e-05
#summary(lm(mcc_NeuN_neg ~ dx + age + sex + batch, data = ctp))
#summary(lm(h_NeuN_neg ~ dx + age + sex + batch, data = ctp))